diff --git a/.clang-format b/.clang-format
old mode 100644
new mode 100755
diff --git a/.gitattributes b/.gitattributes
old mode 100644
new mode 100755
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
old mode 100644
new mode 100755
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
old mode 100644
new mode 100755
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
new file mode 100755
index 00000000..20035f68
--- /dev/null
+++ b/.github/workflows/bench.yml
@@ -0,0 +1,169 @@
+# .github/workflows/bench.yml
+name: Buddy-Benchmark CI
+
+on:
+ push: # fire on any branch
+ branches: ['**']
+ pull_request:
+ workflow_dispatch:
+ inputs:
+ upstream_repo:
+ description: "Which repo changed (mlir|benchmark)"
+ required: false
+ upstream_sha:
+ description: "Upstream commit SHA for result folder"
+ required: false
+
+jobs:
+ bench:
+ runs-on: self-hosted
+ permissions:
+ contents: read
+ pages: write
+ id-token: write
+
+ steps:
+ # ------------------------------------------------------------
+ # 1) update the two local clones so they match the commit that
+ # triggered the run (fast-forward only for safety)
+ # ------------------------------------------------------------
+ - name: Update local clones
+ run: |
+ set -e
+ for dir in buddy-benchmark buddy-mlir; do
+ cd /home/quliu/buddy-complier-workspace/$dir
+ git remote update
+ git pull --ff-only
+ done
+
+ # ------------------------------------------------------------
+ # 2) run everything in the Docker sandbox
+ # ------------------------------------------------------------
+ - name: Build & run benchmarks
+ run: |
+ set -e
+ TEST_DIR=/home/quliu/buddy-complier-workspace/buddy-benchmark/test_result
+ if [ -d "$TEST_DIR" ] && [ "$(find "$TEST_DIR" -type f | wc -l)" -gt 0 ]; then
+ echo "[Skip] $TEST_DIR already has benchmark outputs; skipping build/run."
+ echo "[Info] File count: $(find "$TEST_DIR" -type f | wc -l)"
+ else
+ echo "[Run] No existing results detected; running benchmarks in Docker."
+ /home/quliu/buddy-complier-workspace/run_docker.sh
+ fi
+ # ------------------------------------------------------------
+ # 2½) decide which date folder we’re about to publish
+ # ------------------------------------------------------------
+ - name: Set BENCH_DATE and BENCH_DIR
+ run: |
+ bench_date=$(date +'%Y-%m-%d')
+ # Prefer a passed upstream SHA, otherwise use this workflow's SHA
+ run_sha="${{ github.event.inputs.upstream_sha || github.sha }}"
+ echo "BENCH_DATE=$bench_date" >> "$GITHUB_ENV"
+ echo "BENCH_DIR=$HOME/buddy-complier-workspace/buddy-benchmark/site/benchmarks/$bench_date/${run_sha}" >> "$GITHUB_ENV"
+
+ # ------------------------------------------------------------
+ # 3-5) (UNCHANGED) upload raw logs, convert to HTML, deploy Pages
+ # ------------------------------------------------------------
+ - name: Upload raw logs as artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: vectorization-logs-${{ github.sha }}
+ path: /home/quliu/buddy-complier-workspace/buddy-benchmark/test_result
+ retention-days: 30
+
+ - name: Build mini-site
+ working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark
+ run: |
+ rm -rf site
+ mkdir -p "${{ env.BENCH_DIR }}"
+ python3 scripts/logs2html.py test_result "${{ env.BENCH_DIR }}/"
+
+ - name: Update benchmarks/latest redirect
+ working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks
+ run: |
+ set -e
+ # -------- pick the most recent dated folder (YYYY-MM-DD) -------------
+ latest=$(ls -1d 20*/ | sort -r | head -n1 | tr -d '/')
+ echo "[Info] newest run is: $latest"
+
+ # -------- rebuild the 'latest' folder with a meta-refresh ------------
+ rm -rf latest
+ mkdir -p latest
+ printf '%s\n' "" > latest/index.html
+ echo "[Info] benchmarks/latest now points to ../${latest}/"
+
+ # ------------------------------------------------------------
+ # 4) make /benchmarks/ point to the most recent run as well
+ # ------------------------------------------------------------
+ # (Removed) previous redirect-only index; replaced by full listing below
+
+ - name: Upload site artifact
+ uses: actions/upload-pages-artifact@v3
+ with:
+ path: /home/quliu/buddy-complier-workspace/buddy-benchmark/site
+
+ - name: Build landing page for this run
+ run: |
+ run_root="${{ env.BENCH_DIR }}"
+ mkdir -p "$run_root"
+
+ {
+ printf '%s\n' \
+ '---' \
+ 'layout: default' \
+ 'title: Benchmark run' \
+ 'nav_exclude: true' \
+ '---' \
+ '' \
+ '
Benchmark results
' \
+ '' \
+ '' \
+ '{% for f in site.static_files %}' \
+ ' {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %}' \
+ ' - {{ f.name }}
' \
+ ' {% endif %}' \
+ '{% endfor %}' \
+ '
'
+ } > "$run_root/index.html"
+
+
+ - name: Build top-level benchmarks index (list all runs)
+ working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site
+ run: |
+ set -e
+ out=benchmarks/index.html
+ mkdir -p benchmarks
+ {
+ printf '%s\n' \
+ '---' \
+ 'layout: default' \
+ 'title: Benchmarks' \
+ 'nav_exclude: true' \
+ '---' \
+ 'Benchmark runs
' \
+ 'Select a date and commit:
'
+
+ # List dates newest first
+ for d in $(ls -1d benchmarks/20*/ | sort -r); do
+ d=${d%/}
+ echo "${d#benchmarks/}
"
+ echo ""
+ # List shas newest first if timestamps exist; otherwise lexicographic
+ for sha in $(ls -1d "$d"/*/ 2>/dev/null | sort -r); do
+ sha=${sha%/}
+ rel=${sha#benchmarks/}
+ echo " - ${rel}
"
+ done
+ echo "
"
+ done
+ } > "$out"
+
+ - name: Push benchmark results
+ uses: peaceiris/actions-gh-pages@v4
+ with:
+ personal_token: ${{ secrets.BUDDY_SITE_PAT }}
+ external_repository: buddy-compiler/buddy-compiler.github.io
+ publish_dir: /home/quliu/buddy-complier-workspace/buddy-benchmark/site # <- root of generated site
+ publish_branch: master
+ keep_files: true # keep earlier runs
+ enable_jekyll: true
diff --git a/.github/workflows/watch-upstream.yml b/.github/workflows/watch-upstream.yml
new file mode 100644
index 00000000..49a8c56d
--- /dev/null
+++ b/.github/workflows/watch-upstream.yml
@@ -0,0 +1,96 @@
+name: Watch upstream and trigger bench on change
+on:
+ schedule:
+ - cron: "*/10 * * * *" # every 10 minutes
+ workflow_dispatch:
+
+permissions:
+ contents: write # to push state branch
+ actions: write # to dispatch workflows
+
+concurrency:
+ group: watch-upstream
+ cancel-in-progress: true
+
+jobs:
+ watch:
+ runs-on: self-hosted
+ steps:
+ - name: Checkout default branch
+ uses: actions/checkout@v4
+ with:
+ ref: main # adjust if your default branch differs
+ fetch-depth: 0
+
+ - name: Create/switch to automation-state branch
+ run: |
+ set -e
+ git fetch origin automation-state || true
+ if git show-ref --verify --quiet refs/remotes/origin/automation-state; then
+ # Start from the remote state branch to avoid non-ff push
+ git checkout -B automation-state origin/automation-state
+ else
+ # First run: create a new state branch from current HEAD
+ git checkout -B automation-state
+ fi
+
+ - name: Get upstream HEAD SHAs
+ id: head
+ run: |
+ set -e
+ MLIR_SHA=$(git ls-remote https://github.com/buddy-compiler/buddy-mlir.git refs/heads/main | awk '{print $1}')
+ BENCH_UP_SHA=$(git ls-remote https://github.com/buddy-compiler/buddy-benchmark.git refs/heads/main | awk '{print $1}')
+ echo "mlir=${MLIR_SHA}" >> $GITHUB_OUTPUT
+ echo "bench=${BENCH_UP_SHA}" >> $GITHUB_OUTPUT
+
+ - name: Load previous SHAs
+ id: prev
+ run: |
+ echo "prev_mlir=$(cat mlir.sha 2>/dev/null || echo none)" >> $GITHUB_OUTPUT
+ echo "prev_bench=$(cat bench.sha 2>/dev/null || echo none)" >> $GITHUB_OUTPUT
+
+ - name: Decide if changed
+ id: decide
+ run: |
+ changed=false
+ if [ "${{ steps.head.outputs.mlir }}" != "${{ steps.prev.outputs.prev_mlir }}" ] || \
+ [ "${{ steps.head.outputs.bench }}" != "${{ steps.prev.outputs.prev_bench }}" ]; then
+ changed=true
+ fi
+ echo "changed=$changed" >> $GITHUB_OUTPUT
+
+ - name: Determine which repo changed
+ if: steps.decide.outputs.changed == 'true'
+ id: which
+ run: |
+ if [ "${{ steps.head.outputs.mlir }}" != "${{ steps.prev.outputs.prev_mlir }}" ]; then
+ echo "repo=mlir" >> $GITHUB_OUTPUT
+ echo "sha=${{ steps.head.outputs.mlir }}" >> $GITHUB_OUTPUT
+ else
+ echo "repo=benchmark" >> $GITHUB_OUTPUT
+ echo "sha=${{ steps.head.outputs.bench }}" >> $GITHUB_OUTPUT
+ fi
+
+ - name: Update state branch
+ if: steps.decide.outputs.changed == 'true'
+ run: |
+ set -e
+ printf "%s" "${{ steps.head.outputs.mlir }}" > mlir.sha
+ printf "%s" "${{ steps.head.outputs.bench }}" > bench.sha
+ git config user.name "github-actions[bot]"
+ git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+ git add mlir.sha bench.sha
+ git commit -m "state: mlir=${{ steps.head.outputs.mlir }} bench=${{ steps.head.outputs.bench }}" || echo "no changes"
+ # Re-sync and push with lease to avoid non-ff failures if remote advanced
+ git fetch origin automation-state || true
+ git rebase origin/automation-state || true
+ git push --force-with-lease=refs/heads/automation-state origin HEAD:automation-state
+
+ - name: Dispatch bench.yml in this repo
+ if: steps.decide.outputs.changed == 'true'
+ run: |
+ curl -sS -X POST \
+ -H "Accept: application/vnd.github+json" \
+ -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
+ https://api.github.com/repos/${{ github.repository }}/actions/workflows/bench.yml/dispatches \
+ -d '{"ref":"main","inputs":{"upstream_repo":"${{ steps.which.outputs.repo }}","upstream_sha":"${{ steps.which.outputs.sha }}"}}'
diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
index bb52eca8..ef5a5a62
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,14 @@
# vscode configurations
/.vscode
+
+# Third-party checkouts and external repos
+/thirdparty/
+
+# Generated sites and results
+/site/
+/test_result/
+
+# Local Python/venv stuff
+__pycache__/
+.venv/
diff --git a/.gitmodules b/.gitmodules
old mode 100644
new mode 100755
index 2c8ef1d1..7585fff9
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,12 +1,16 @@
[submodule "thirdparty/opencv"]
path = thirdparty/opencv
url = https://github.com/opencv/opencv.git
+ ignore = all
[submodule "thirdparty/Halide"]
path = thirdparty/Halide
url = https://github.com/halide/Halide.git
+ ignore = all
[submodule "thirdparty/eigen"]
path = thirdparty/eigen
url = https://gitlab.com/libeigen/eigen.git
+ ignore = all
[submodule "thirdparty/kfr"]
path = thirdparty/kfr
url = https://github.com/kfrlib/kfr.git
+ ignore = all
diff --git a/.style.yapf b/.style.yapf
old mode 100644
new mode 100755
diff --git a/CMakeLists.txt b/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/LICENSE b/LICENSE
old mode 100644
new mode 100755
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Audios/NASA_Mars.wav b/benchmarks/AudioProcessing/Audios/NASA_Mars.wav
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/CMakeLists.txt b/benchmarks/AudioProcessing/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/BiquadOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/MLIRBiquad.mlir b/benchmarks/AudioProcessing/Operations/BiquadOp/MLIRBiquad.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/Main.cpp b/benchmarks/AudioProcessing/Operations/BiquadOp/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/Utils.hpp b/benchmarks/AudioProcessing/Operations/BiquadOp/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/FFTOp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/FFTOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/FFTOp/KFRFft.cpp b/benchmarks/AudioProcessing/Operations/FFTOp/KFRFft.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/FFTOp/Main.cpp b/benchmarks/AudioProcessing/Operations/FFTOp/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/FIROp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/FIROp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIR.mlir b/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIR.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRTiledVectorization.mlir b/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRTiledVectorization.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRVectorization.mlir b/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRVectorization.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/FIROp/Main.cpp b/benchmarks/AudioProcessing/Operations/FIROp/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/FIROp/Utils.hpp b/benchmarks/AudioProcessing/Operations/FIROp/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/IIROp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/IIROp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRScalar.mlir b/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRScalar.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRVectorization.mlir b/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRVectorization.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/IIROp/Main.cpp b/benchmarks/AudioProcessing/Operations/IIROp/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/IIROp/Utils.hpp b/benchmarks/AudioProcessing/Operations/IIROp/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/RFFTOp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/RFFTOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/RFFTOp/GoogleBenchmarkMain.cpp b/benchmarks/AudioProcessing/Operations/RFFTOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/Operations/RFFTOp/RFFT.py b/benchmarks/AudioProcessing/Operations/RFFTOp/RFFT.py
old mode 100644
new mode 100755
diff --git a/benchmarks/AudioProcessing/README.md b/benchmarks/AudioProcessing/README.md
old mode 100644
new mode 100755
diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/CMakeLists.txt b/benchmarks/DeepLearning/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/CMakeLists.txt b/benchmarks/DeepLearning/Layers/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/FFN/.gitignore b/benchmarks/DeepLearning/Layers/FFN/.gitignore
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/FFN/CMakeLists.txt b/benchmarks/DeepLearning/Layers/FFN/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/FFN/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Layers/FFN/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/FFN/buddy_ffn_import.py b/benchmarks/DeepLearning/Layers/FFN/buddy_ffn_import.py
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/.gitignore b/benchmarks/DeepLearning/Layers/RMSNorm/.gitignore
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/CMakeLists.txt b/benchmarks/DeepLearning/Layers/RMSNorm/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Layers/RMSNorm/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/buddy_rmsnorm_import.py b/benchmarks/DeepLearning/Layers/RMSNorm/buddy_rmsnorm_import.py
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/.gitignore b/benchmarks/DeepLearning/Layers/SelfAttention/.gitignore
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/CMakeLists.txt b/benchmarks/DeepLearning/Layers/SelfAttention/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Layers/SelfAttention/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/buddy_selfattention_import.py b/benchmarks/DeepLearning/Layers/SelfAttention/buddy_selfattention_import.py
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Bert/.gitignore b/benchmarks/DeepLearning/Models/Bert/.gitignore
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Bert/CMakeLists.txt b/benchmarks/DeepLearning/Models/Bert/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Bert/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/Bert/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Bert/buddy_bert_import.py b/benchmarks/DeepLearning/Models/Bert/buddy_bert_import.py
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/CMakeLists.txt b/benchmarks/DeepLearning/Models/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/LeNet/.gitignore b/benchmarks/DeepLearning/Models/LeNet/.gitignore
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/LeNet/CMakeLists.txt b/benchmarks/DeepLearning/Models/LeNet/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/LeNet/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/LeNet/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/LeNet/buddy_lenet_import.py b/benchmarks/DeepLearning/Models/LeNet/buddy_lenet_import.py
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/LeNet/lenet_model.pth b/benchmarks/DeepLearning/Models/LeNet/lenet_model.pth
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/LeNet/model.py b/benchmarks/DeepLearning/Models/LeNet/model.py
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/.gitignore b/benchmarks/DeepLearning/Models/MobileNet-V3/.gitignore
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/CMakeLists.txt b/benchmarks/DeepLearning/Models/MobileNet-V3/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/MobileNet-V3/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/buddy_mobilenetv3_import.py b/benchmarks/DeepLearning/Models/MobileNet-V3/buddy_mobilenetv3_import.py
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Resnet18/.gitignore b/benchmarks/DeepLearning/Models/Resnet18/.gitignore
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Resnet18/CMakeLists.txt b/benchmarks/DeepLearning/Models/Resnet18/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Resnet18/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/Resnet18/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Resnet18/buddy_resnet18_import.py b/benchmarks/DeepLearning/Models/Resnet18/buddy_resnet18_import.py
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/TinyLlama/.gitignore b/benchmarks/DeepLearning/Models/TinyLlama/.gitignore
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/TinyLlama/CMakeLists.txt b/benchmarks/DeepLearning/Models/TinyLlama/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/TinyLlama/Main.cpp b/benchmarks/DeepLearning/Models/TinyLlama/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/TinyLlama/Utils.hpp b/benchmarks/DeepLearning/Models/TinyLlama/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/TinyLlama/buddy_tinyllama_import.py b/benchmarks/DeepLearning/Models/TinyLlama/buddy_tinyllama_import.py
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Whisper/.gitignore b/benchmarks/DeepLearning/Models/Whisper/.gitignore
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Whisper/CMakeLists.txt b/benchmarks/DeepLearning/Models/Whisper/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Whisper/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/Whisper/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Models/Whisper/buddy_whisper_import.py b/benchmarks/DeepLearning/Models/Whisper/buddy_whisper_import.py
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithAddfOp/ArithAddf.mlir b/benchmarks/DeepLearning/Ops/ArithAddfOp/ArithAddf.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithAddfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithAddfOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithAddfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithAddfOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithDivfOp/ArithDivf.mlir b/benchmarks/DeepLearning/Ops/ArithDivfOp/ArithDivf.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithDivfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithDivfOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithDivfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithDivfOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithMulfOp/ArithMulf.mlir b/benchmarks/DeepLearning/Ops/ArithMulfOp/ArithMulf.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithMulfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithMulfOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithMulfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithMulfOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithNegfOp/ArithNegf.mlir b/benchmarks/DeepLearning/Ops/ArithNegfOp/ArithNegf.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithNegfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithNegfOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithNegfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithNegfOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithSubfOp/ArithSubf.mlir b/benchmarks/DeepLearning/Ops/ArithSubfOp/ArithSubf.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithSubfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithSubfOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ArithSubfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithSubfOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMul.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMul.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulBroadcast.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulBroadcast.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulSCF.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulSCF.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVec.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVec.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVecTile.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVecTile.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/BatchMatMulOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/Main.cpp b/benchmarks/DeepLearning/Ops/BatchMatMulOp/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/Utils.hpp b/benchmarks/DeepLearning/Ops/BatchMatMulOp/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/CMakeLists.txt b/benchmarks/DeepLearning/Ops/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/Conv2DNchwFchw.mlir b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/Conv2DNchwFchw.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/conv2d-nchw-fchw-im2col.mlir b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/conv2d-nchw-fchw-im2col.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwc.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwc.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVec.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVec.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVecRVV.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVecRVV.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Main.cpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Utils.hpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwc.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwc.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVec.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVec.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVecTile.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVecTile.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Main.cpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Utils.hpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/Conv2DNhwcHwcf.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/Conv2DNhwcHwcf.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwc.mlir b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwc.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwcVec.mlir b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwcVec.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Main.cpp b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Utils.hpp b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MatMulOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/Main.cpp b/benchmarks/DeepLearning/Ops/MatMulOp/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/Utils.hpp b/benchmarks/DeepLearning/Ops/MatMulOp/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/matmul.mlir b/benchmarks/DeepLearning/Ops/MatMulOp/matmul.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Main.cpp b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/MatMulTransposeB.mlir b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/MatMulTransposeB.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Utils.hpp b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MathExpOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MathExpOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MathExpOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/MathExpOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MathExpOp/MathExp.mlir b/benchmarks/DeepLearning/Ops/MathExpOp/MathExp.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MathFpowOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MathFpowOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MathFpowOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/MathFpowOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MathFpowOp/MathFpow.mlir b/benchmarks/DeepLearning/Ops/MathFpowOp/MathFpow.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MathRsqrtOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MathRsqrtOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MathRsqrtOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/MathRsqrtOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/MathRsqrtOp/MathRsqrt.mlir b/benchmarks/DeepLearning/Ops/MathRsqrtOp/MathRsqrt.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/PoolingNhwcSum.mlir b/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/PoolingNhwcSum.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ReduceAddfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ReduceAddfOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ReduceAddfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ReduceAddfOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ReduceAddfOp/ReduceAddf.mlir b/benchmarks/DeepLearning/Ops/ReduceAddfOp/ReduceAddf.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ReduceMaxfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ReduceMaxfOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ReduceMaxfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ReduceMaxfOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/ReduceMaxfOp/ReduceMaxf.mlir b/benchmarks/DeepLearning/Ops/ReduceMaxfOp/ReduceMaxf.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/GoogleBenchmarkMain.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/SoftmaxExpSumDiv.mlir b/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/SoftmaxExpSumDiv.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/TransposeOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/Main.cpp b/benchmarks/DeepLearning/Ops/TransposeOp/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/Transpose2D.mlir b/benchmarks/DeepLearning/Ops/TransposeOp/Transpose2D.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/Utils.hpp b/benchmarks/DeepLearning/Ops/TransposeOp/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/DeepLearning/README.md b/benchmarks/DeepLearning/README.md
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/CMakeLists.txt b/benchmarks/Gemmini/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/Ops/CMakeLists.txt b/benchmarks/Gemmini/Ops/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/Ops/MatMulOp/CMakeLists.txt b/benchmarks/Gemmini/Ops/MatMulOp/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c b/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/Ops/MatMulOp/ExoUtils.h b/benchmarks/Gemmini/Ops/MatMulOp/ExoUtils.h
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/Ops/MatMulOp/Main.cpp b/benchmarks/Gemmini/Ops/MatMulOp/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/Ops/MatMulOp/matmul.mlir b/benchmarks/Gemmini/Ops/MatMulOp/matmul.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/README.md b/benchmarks/Gemmini/README.md
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/ResNet-101/.gitattributes b/benchmarks/Gemmini/ResNet-101/.gitattributes
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/ResNet-101/CMakeLists.txt b/benchmarks/Gemmini/ResNet-101/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/ResNet-101/CRunnerUtils.cpp b/benchmarks/Gemmini/ResNet-101/CRunnerUtils.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/ResNet-101/Main.cpp b/benchmarks/Gemmini/ResNet-101/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/ResNet-101/ResNet101.mlir b/benchmarks/Gemmini/ResNet-101/ResNet101.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/ResNet-101/images/Cat.h b/benchmarks/Gemmini/ResNet-101/images/Cat.h
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/ResNet-101/images/Cat.jpg b/benchmarks/Gemmini/ResNet-101/images/Cat.jpg
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/ResNet-101/include/Labels.h b/benchmarks/Gemmini/ResNet-101/include/Labels.h
old mode 100644
new mode 100755
diff --git a/benchmarks/Gemmini/Utils.h b/benchmarks/Gemmini/Utils.h
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/BuddyConv2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyConv2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/BuddyCorr2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyCorr2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/BuddyMorph2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyMorph2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/BuddyResize2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyResize2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/CMakeLists.txt b/benchmarks/ImageProcessing/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/EigenConvolve2DBenchmark.cpp b/benchmarks/ImageProcessing/EigenConvolve2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu.png b/benchmarks/ImageProcessing/Images/YuTu.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu1022.png b/benchmarks/ImageProcessing/Images/YuTu1022.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu1024.png b/benchmarks/ImageProcessing/Images/YuTu1024.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu128.png b/benchmarks/ImageProcessing/Images/YuTu128.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu16.png b/benchmarks/ImageProcessing/Images/YuTu16.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu18.png b/benchmarks/ImageProcessing/Images/YuTu18.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu256.png b/benchmarks/ImageProcessing/Images/YuTu256.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu32.png b/benchmarks/ImageProcessing/Images/YuTu32.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu4.png b/benchmarks/ImageProcessing/Images/YuTu4.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu512.png b/benchmarks/ImageProcessing/Images/YuTu512.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu6.png b/benchmarks/ImageProcessing/Images/YuTu6.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu64.png b/benchmarks/ImageProcessing/Images/YuTu64.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Images/YuTu8.png b/benchmarks/ImageProcessing/Images/YuTu8.png
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/MLIRConv2D.mlir b/benchmarks/ImageProcessing/MLIRConv2D.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/MLIRConv2DBenchmark.cpp b/benchmarks/ImageProcessing/MLIRConv2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/Main.cpp b/benchmarks/ImageProcessing/Main.cpp
old mode 100644
new mode 100755
index 3f2f2eeb..81b68103
--- a/benchmarks/ImageProcessing/Main.cpp
+++ b/benchmarks/ImageProcessing/Main.cpp
@@ -80,18 +80,18 @@ void registerBenchmarkOpenCVResize2D();
// Run benchmarks.
int main(int argc, char **argv) {
- if (argc != 5) {
- throw std::invalid_argument(
- "Wrong format of command line arguments.\n"
- "Correct format is ./image-processing-benchmark \n where "
- "image path provides path of the image to be processed, kernel name "
- "denotes the name "
- "of desired kernel as specified in "
- "kernelmorph denotes the kernel to be used for morphological operations"
- "include/ImageProcessing/Kernels.h and Boundary options available "
- "are CONSTANT_PADDING, REPLICATE_PADDING.\n");
- }
+ // if (argc != 5) {
+ // throw std::invalid_argument(
+ // "Wrong format of command line arguments.\n"
+ // "Correct format is ./image-processing-benchmark \n where "
+ // "image path provides path of the image to be processed, kernel name "
+ // "denotes the name "
+ // "of desired kernel as specified in "
+ // "kernelmorph denotes the kernel to be used for morphological operations"
+ // "include/ImageProcessing/Kernels.h and Boundary options available "
+ // "are CONSTANT_PADDING, REPLICATE_PADDING.\n");
+ // }
Img img = dip::imread(argv[1], dip::IMGRD_GRAYSCALE);
diff --git a/benchmarks/ImageProcessing/OpenCVFilter2DBenchmark.cpp b/benchmarks/ImageProcessing/OpenCVFilter2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/OpenCVMorph2DBenchmark.cpp b/benchmarks/ImageProcessing/OpenCVMorph2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/OpenCVResize2DBenchmark.cpp b/benchmarks/ImageProcessing/OpenCVResize2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/ImageProcessing/include/Kernels.h b/benchmarks/ImageProcessing/include/Kernels.h
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/CMakeLists.txt b/benchmarks/OpOptimization/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/CMakeLists.txt b/benchmarks/OpOptimization/Conv2dNchwFchw/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchw.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchw.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBenchmark.cpp b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBroadcast.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBroadcast.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwIm2col.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwIm2col.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwWinagrad.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwWinagrad.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Main.cpp b/benchmarks/OpOptimization/Conv2dNchwFchw/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/MatMul/CMakeLists.txt b/benchmarks/OpOptimization/MatMul/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/MatMul/Main.cpp b/benchmarks/OpOptimization/MatMul/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/MatMul/MatMul.mlir b/benchmarks/OpOptimization/MatMul/MatMul.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/MatMul/MatMulBenchmark.cpp b/benchmarks/OpOptimization/MatMul/MatMulBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/MatMul/MatMulBroadcast.mlir b/benchmarks/OpOptimization/MatMul/MatMulBroadcast.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/MatMul/MatMulTransform.mlir b/benchmarks/OpOptimization/MatMul/MatMulTransform.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/MatMul/TVM/.gitignore b/benchmarks/OpOptimization/MatMul/TVM/.gitignore
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/MatMul/TVM/main.py b/benchmarks/OpOptimization/MatMul/TVM/main.py
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/MatMul/TVM/matmul_autotvm.py b/benchmarks/OpOptimization/MatMul/TVM/matmul_autotvm.py
old mode 100644
new mode 100755
diff --git a/benchmarks/OpOptimization/MatMul/TVM/matmul_manual.py b/benchmarks/OpOptimization/MatMul/TVM/matmul_manual.py
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/CMakeLists.txt b/benchmarks/Vectorization/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/MLIRMatMul.mlir b/benchmarks/Vectorization/MLIRMatMul.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/MLIRMatMulBenchmark.cpp b/benchmarks/Vectorization/MLIRMatMulBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/MLIRMatVec.mlir b/benchmarks/Vectorization/MLIRMatVec.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/MLIRMatVecBenchmark.cpp b/benchmarks/Vectorization/MLIRMatVecBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/Main.cpp b/benchmarks/Vectorization/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/CMakeLists.txt b/benchmarks/Vectorization/gccloops/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10a.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10a.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10aBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10aBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10b.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10b.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10bBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10bBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2a.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2a.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2aBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2aBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2b.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2b.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2bBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2bBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4a.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4a.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4aBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4aBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4b.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4b.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4bBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4bBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4c.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4c.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4cBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4cBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9Benchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/gccloops/Main.cpp b/benchmarks/Vectorization/gccloops/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/linpackc/CMakeLists.txt b/benchmarks/Vectorization/linpackc/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyBenchmark.cpp b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF32.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF32.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF64.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF64.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF32.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF32.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF64.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF64.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/linpackc/Main.cpp b/benchmarks/Vectorization/linpackc/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/CMakeLists.txt b/benchmarks/Vectorization/polybench/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench2mm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybench2mm.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench2mmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybench2mmBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench3mm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybench3mm.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench3mmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybench3mmBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAdi.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchAdi.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAdiBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchAdiBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAtax.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchAtax.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAtaxBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchAtaxBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchBicg.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchBicg.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchBicgBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchBicgBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCholesky.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchCholesky.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCholeskyBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchCholeskyBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelation.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelation.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelationBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelationBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCovariance.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchCovariance.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCovarianceBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchCovarianceBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDeriche.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchDeriche.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDericheBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchDericheBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgen.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgen.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgenBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgenBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDurbin.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchDurbin.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDurbinBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchDurbinBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2D.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshall.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshall.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshallBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshallBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGemm.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGemmBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemver.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGemver.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemverBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGemverBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGesummv.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGesummv.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGesummvBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGesummvBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidt.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidt.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidtBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidtBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3D.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1D.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2D.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLu.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchLu.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLuBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchLuBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmp.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmp.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmpBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmpBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchMvt.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchMvt.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchMvtBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchMvtBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchNussinov.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchNussinov.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchNussinovBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchNussinovBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2D.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2DBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSymm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSymm.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSymmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSymmBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2k.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2k.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2kBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2kBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyrk.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSyrk.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyrkBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSyrkBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolv.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolv.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolvBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolvBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrmm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchTrmm.mlir
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrmmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchTrmmBenchmark.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/Main.cpp b/benchmarks/Vectorization/polybench/Main.cpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/README.md b/benchmarks/Vectorization/polybench/README.md
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/Utils.hpp b/benchmarks/Vectorization/polybench/Utils.hpp
old mode 100644
new mode 100755
diff --git a/benchmarks/Vectorization/polybench/polybench_mlir_gen.py b/benchmarks/Vectorization/polybench/polybench_mlir_gen.py
old mode 100644
new mode 100755
diff --git a/cmake/buddy-benchmark.cmake b/cmake/buddy-benchmark.cmake
old mode 100644
new mode 100755
diff --git a/cmake/check-simd.cmake b/cmake/check-simd.cmake
old mode 100644
new mode 100755
diff --git a/docs/ConvAlgorithms.md b/docs/ConvAlgorithms.md
old mode 100644
new mode 100755
diff --git a/docs/DeepLearningBenchmark.md b/docs/DeepLearningBenchmark.md
old mode 100644
new mode 100755
diff --git a/docs/GemminiConfig.md b/docs/GemminiConfig.md
old mode 100644
new mode 100755
diff --git a/docs/Images/CoefficientsBroadcasting.png b/docs/Images/CoefficientsBroadcasting.png
old mode 100644
new mode 100755
diff --git a/docs/PrepareRVOpenMP.md b/docs/PrepareRVOpenMP.md
old mode 100644
new mode 100755
diff --git a/requirements.txt b/requirements.txt
old mode 100644
new mode 100755
diff --git a/scripts/logs2html.py b/scripts/logs2html.py
new file mode 100755
index 00000000..a567930b
--- /dev/null
+++ b/scripts/logs2html.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+"""
+Turn every *.json under into /.html.
+If a twin *.log exists (same stem), show it in a collapsible .
+If the JSON is unreadable, generate a red “FAILED” page instead of aborting.
+"""
+
+import html, json, pathlib, datetime, sys, traceback
+
+class BrokenJSON(RuntimeError):
+ pass
+
+src, dst = map(pathlib.Path, sys.argv[1:3])
+dst.mkdir(parents=True, exist_ok=True)
+stamp = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")
+
+CSS = """
+
+"""
+
+def gbench_json_to_table(js_path: pathlib.Path) -> str:
+ """Turn one Google-Benchmark JSON file into an HTML ."""
+ try:
+ payload = json.loads(js_path.read_text())
+ except json.JSONDecodeError as e:
+ raise BrokenJSON(f"JSON parse error: {e.msg}") from e
+
+ if "benchmarks" not in payload:
+ raise BrokenJSON("Missing top-level ‘benchmarks’ array")
+
+ data = payload["benchmarks"]
+ if not data:
+ raise BrokenJSON("Empty ‘benchmarks’ array")
+
+ first = next((b for b in data if b.get("run_type") == "iteration"), None)
+ if not first:
+ raise BrokenJSON("No ‘iteration’ rows found")
+
+ unit = html.escape(first.get("time_unit", "ns"))
+
+ head = (f"| Name | Time ({unit}) | "
+ f"CPU ({unit}) | Iterations |
")
+
+ rows = "\n".join(
+ f"| {html.escape(b['name'])} | "
+ f"{b['real_time']:.3g} | "
+ f"{b['cpu_time']:.3g} | "
+ f"{b['iterations']:,} |
"
+ for b in data
+ if b.get("run_type") == "iteration"
+ )
+ return f"{js_path.name}
\n"
+
+# ---------------------------------------------------------------------------
+
+for js in src.rglob("*.json"):
+ print("→ parsing", js)
+ log = js.with_suffix(".log")
+ rel = js.relative_to(src)
+ page = dst / rel.with_suffix(".html")
+ page.parent.mkdir(parents=True, exist_ok=True)
+
+ body = [CSS, f"{rel}
{stamp}
"]
+
+ try:
+ body.append(gbench_json_to_table(js))
+ except (BrokenJSON, json.JSONDecodeError) as err:
+ # Build a failure stub but keep the run going
+ body.append(f"⚠ FAILED: "
+ f"{html.escape(str(err))}
")
+
+ # Always embed the console log if available
+ if log.exists():
+ body.append("Console output
\n"
+ f"{html.escape(log.read_text())} ")
+
+ page.write_text("\n".join(body))
+
+# ---------------------------------------------------------------------------
+# Build a simple index in the destination root (dst)
+# ---------------------------------------------------------------------------
+links = "\n".join(
+ f''
+ f'{p.relative_to(dst).as_posix()}'
+ for p in sorted(dst.rglob("*.html"))
+ if p.name != "index.html"
+)
+
+(dst / "index.html").write_text(
+ CSS + f"Buddy-Benchmark results
"
+)
diff --git a/scripts/run_docker.sh b/scripts/run_docker.sh
new file mode 100755
index 00000000..a48e21c3
--- /dev/null
+++ b/scripts/run_docker.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -e
+
+# ➊ one container per run, killed automatically on exit
+CID=$(docker run -d --name buddy-mlir-ci-test \
+ --privileged \
+ -v "${GITHUB_WORKSPACE}:/home/buddy-complier-workspace" \
+ liuqun1006/buddycompiler-base:python sleep infinity)
+
+trap "docker rm -f ${CID}" EXIT
+
+# ➋ execute the whole build-and-test sequence inside
+docker exec "${CID}" bash -lc '
+ set -e
+ cd /home/buddy-complier-workspace/buddy-mlir
+ ./test.sh build-llvm
+ ./test.sh build-buddy
+ ./test.sh run
+
+ cd /home/buddy-complier-workspace/buddy-benchmark/test
+ ./test_script_vectorizationprocessing.sh
+'
+
+# ➌ bring the logs back to the host (under ./test_result)
+docker cp "${CID}":/home/buddy-complier-workspace/buddy-benchmark/test_result ./test_result
+
diff --git a/test/test_script_audioprocessing.sh b/test/test_script_audioprocessing.sh
new file mode 100755
index 00000000..e16ae655
--- /dev/null
+++ b/test/test_script_audioprocessing.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+export BUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build
+export LLVM_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/llvm/build
+cd /home/buddy-complier-workspace/buddy-benchmark
+mkdir -p build && cd build
+cmake -G Ninja .. \
+ -DCMAKE_BUILD_TYPE=RELEASE \
+ -DAUDIO_PROCESSING_BENCHMARKS=ON \
+ -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \
+ -DKFR_DIR=/home/buddy-complier-workspace/buddy-benchmark/thirdparty/kfr \
+ -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR}
+ninja dap-op-iir-benchmark
+cd bin
+./dap-op-iir-benchmark
+
+
+
+cmake -G Ninja .. \
+ -DCMAKE_BUILD_TYPE=RELEASE \
+ -DAUDIO_PROCESSING_BENCHMARKS=ON \
+ -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \
+ -DKFR_DIR=/home/buddy-complier-workspace/buddy-benchmark/thirdparty/kfr \
+ -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \
+ -DPYTHON_BINARY_DIR="$(dirname "$(which python3)")"
+
+ninja audio-plot
+cd bin
+./audio-plot ../../benchmarks/AudioProcessing/Audios/NASA_Mars.wav ResultKFRIir.wav
+# "
+# root@4f445bb41579:/home/buddy-complier-workspace/buddy-benchmark/build/bin# ./audio-plot ../../benchmarks/AudioProcessing/Audios/NASA_Mars.wav ResultKFRIir.wav
+# Plotting now...
+# Traceback (most recent call last):
+# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plot.py", line 71, in
+# compare_wave(args.file1, args.file2, part=args.part,
+# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 120, in compare_wave
+# after, time2 = get_time_domain(file2)
+# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 60, in get_time_domain
+# info, samples = get_info_and_samples(file)
+# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 38, in get_info_and_samples
+# with wave.open(file, 'rb') as audio:
+# File "/usr/lib/python3.10/wave.py", line 509, in open
+# return Wave_read(f)
+# File "/usr/lib/python3.10/wave.py", line 159, in __init__
+# f = builtins.open(f, 'rb')
+# FileNotFoundError: [Errno 2] No such file or directory: 'ResultKFRIir.wav'
+# "
\ No newline at end of file
diff --git a/test/test_script_deeplearning.sh b/test/test_script_deeplearning.sh
new file mode 100755
index 00000000..eef45e08
--- /dev/null
+++ b/test/test_script_deeplearning.sh
@@ -0,0 +1,232 @@
+#!/usr/bin/env bash
+
+################################################################################
+# 0. Script Setup
+################################################################################
+# We disable "exit on error" so that if one benchmark fails to build or run,
+# we can continue with the rest.
+set +e
+
+################################################################################
+# 1. (Optional) Activate Python/Conda Environment
+################################################################################
+# Uncomment or adjust if you use Anaconda/Miniconda:
+# conda activate
+
+
+################################################################################
+# 2. Build Each Benchmark (Continue Even If One Fails)
+################################################################################
+BENCHMARK_TARGETS=(
+ # ------------------
+ # Model-Level
+ # ------------------
+ "dl-model-tinyllama-benchmark"
+ "dl-model-mobilenetv3-benchmark"
+ "dl-model-lenet-benchmark"
+ "dl-model-bert-benchmark"
+ "dl-model-whisper-benchmark"
+ "dl-model-resnet18-benchmark"
+
+ # ------------------
+ # Layer-Level
+ # ------------------
+ "dl-layer-ffn-benchmark"
+ "dl-layer-selfattention-benchmark"
+ "dl-layer-rmsnorm-benchmark"
+
+ # ------------------
+ # Operation-Level
+ # ------------------
+ "dl-op-linalg-matmul-benchmark"
+ "dl-op-linalg-conv2d-nchw-fchw-benchmark"
+ "dl-op-linalg-conv2d-nhwc-hwcf-benchmark"
+ "dl-op-linalg-conv2d-nhwc-fhwc-benchmark"
+ "dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark"
+ "dl-op-linalg-pooling-nhwc-sum-benchmark"
+ "dl-op-linalg-batch-matmul-benchmark"
+ "dl-op-linalg-arithaddf-benchmark"
+ "dl-op-linalg-arithdivf-benchmark"
+ "dl-op-linalg-arithmulf-benchmark"
+ "dl-op-linalg-arithnegf-benchmark"
+ "dl-op-linalg-arithsubf-benchmark"
+ "dl-op-linalg-mathfpow-benchmark"
+ "dl-op-linalg-mathrsqrt-benchmark"
+ "dl-op-linalg-mathexp-benchmark"
+ "dl-op-linalg-reduceaddf-benchmark"
+ "dl-op-linalg-reducemaxf-benchmark"
+ "dl-op-linalg-softmax-exp-sum-div-benchmark"
+ "dl-op-tosa-transpose-benchmark"
+ "dl-op-matmul-transpose-b-benchmark"
+)
+
+
+################################################################################
+# 3. Set Environment Variables for Buddy MLIR/LLVM
+################################################################################
+# Adjust these paths according to your local setup:
+BUDDY_MLIR_DIR="/home/buddy-complier-workspace/buddy-mlir" # The root directory of buddy-mlir
+LLVM_BUILD_DIR="$BUDDY_MLIR_DIR/llvm/build" # The build dir for LLVM
+BUDDY_BUILD_DIR="$BUDDY_MLIR_DIR/build" # The build dir for buddy-mlir
+
+# Export environment variables:
+export BUDDY_MLIR_BUILD_DIR="$BUDDY_BUILD_DIR"
+export LLVM_MLIR_BUILD_DIR="$LLVM_BUILD_DIR"
+export PYTHONPATH="${LLVM_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_BUILD_DIR}/python_packages:${PYTHONPATH}"
+export BENCHMARK_PATH="${BUDDY_MLIR_DIR}/../buddy-benchmark"
+echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}"
+echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}"
+echo "[Info] PYTHONPATH = ${PYTHONPATH}"
+
+################################################################################
+# 3. Prepare Build Folder and Run CMake
+################################################################################
+cd "${BUDDY_MLIR_DIR}/../buddy-benchmark" || exit 1
+rm -rf build
+mkdir -p build
+cd build || exit 1
+
+echo "[Info] Running CMake configuration..."
+cmake -G Ninja .. \
+ -DDEEP_LEARNING_BENCHMARKS=ON \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
+ -DBUDDY_MLIR_BUILD_DIR="${BUDDY_MLIR_BUILD_DIR}" \
+ -DCMAKE_CXX_COMPILER="${LLVM_MLIR_BUILD_DIR}/bin/clang++" \
+ -DCMAKE_C_COMPILER="${LLVM_MLIR_BUILD_DIR}/bin/clang" \
+ -DCMAKE_CXX_FLAGS="-march=native" \
+ -DCMAKE_C_FLAGS="-march=native"
+
+
+################################################################################
+# 4. Prepare Build Folder and Run CMake
+################################################################################
+
+mkdir -p $BENCHMARK_PATH/test_result
+mkdir -p $BENCHMARK_PATH/test_result/deeplearning
+BUILD_LOG="${BENCHMARK_PATH}/test_result/deeplearning/build_results_summary.log"
+> "${BUILD_LOG}" # Clear/create the file
+
+echo "[Info] Building all benchmarks with Ninja..."
+for target in "${BENCHMARK_TARGETS[@]}"; do
+ echo "==> ninja ${target}"
+ if ninja "${target}"; then
+ echo "[Success] Build of '${target}'" | tee -a "${BUILD_LOG}"
+ else
+ echo "[Failed] Build of '${target}'" | tee -a "${BUILD_LOG}"
+ fi
+done
+
+################################################################################
+# 5. Run Each Benchmark & Redirect Output (Continue Even If One Fails)
+################################################################################
+cd bin || exit 1
+
+RUN_LOG="${BENCHMARK_PATH}/test_result/deeplearning/run_results_summary.log"
+> "${RUN_LOG}" # clear / create the file
+
+echo "[Info] Running all benchmarks in ./bin..."
+for target in "${BENCHMARK_TARGETS[@]}"; do
+ if [[ -f "${target}" ]]; then
+ echo "==> Running ${target}"
+
+ # ---- NEW: dump a machine-readable report next to the plain log -----------
+ json_out="${BENCHMARK_PATH}/test_result/deeplearning/${target}.json"
+
+ if "./${target}" \
+ --benchmark_out="${json_out}" \
+ --benchmark_out_format=json \
+ > "${BENCHMARK_PATH}/test_result/deeplearning/${target}.log" 2>&1
+ then
+ echo "[Success] Run of '${target}'" | tee -a "${RUN_LOG}"
+ echo " ↳ stdout/stderr → ${target}.log" | tee -a "${RUN_LOG}"
+ echo " ↳ gbench JSON → ${target}.json" | tee -a "${RUN_LOG}"
+ else
+ echo "[Failed] Run of '${target}'" | tee -a "${RUN_LOG}"
+ echo " ↳ stdout/stderr → ${target}.log (may contain errors)" | tee -a "${RUN_LOG}"
+ fi
+ # -------------------------------------------------------------------------
+ else
+ echo "[Missing] Executable not found for '${target}'" | tee -a "${RUN_LOG}"
+ fi
+done
+
+
+################################################################################
+# 6. Set Environment Variables for Buddy MLIR/LLVM for cross-compile
+################################################################################
+# Adjust these paths according to your local setup:
+BUDDY_MLIR_DIR="/home/buddy-complier-workspace/buddy-mlir" # The root directory of buddy-mlir
+LLVM_BUILD_DIR="$BUDDY_MLIR_DIR/llvm/build" # The build dir for LLVM
+BUDDY_BUILD_DIR="$BUDDY_MLIR_DIR/build" # The build dir for buddy-mlir
+
+# Export environment variables:
+export BUDDY_MLIR_BUILD_DIR="$BUDDY_BUILD_DIR"
+export LLVM_MLIR_BUILD_DIR="$LLVM_BUILD_DIR"
+export PYTHONPATH="${LLVM_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_BUILD_DIR}/python_packages:${PYTHONPATH}"
+export BUDDY_MLIR_BUILD_CROSS_DIR=${BUDDY_MLIR_BUILD_DIR}/../build
+export RISCV_GNU_TOOLCHAIN=${BUDDY_MLIR_BUILD_DIR}/../thirdparty/riscv-gnu-toolchain
+export RISCV_OMP_SHARED=${LLVM_MLIR_BUILD_DIR}/../build/lib/libomp.so
+export BENCHMARK_PATH="${BUDDY_MLIR_DIR}/../buddy-benchmark"
+
+echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}"
+echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}"
+echo "[Info] PYTHONPATH = ${PYTHONPATH}"
+
+################################################################################
+# 7. Prepare Build Folder and Run CMake
+################################################################################
+cd "${BUDDY_MLIR_DIR}/../buddy-benchmark" || exit 1
+mkdir -p build
+cd build || exit 1
+
+echo "[Info] Running CMake configuration..."
+cmake -G Ninja .. \
+ -DDEEP_LEARNING_BENCHMARKS=ON \
+ -DCMAKE_BUILD_TYPE=RELEASE \
+ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
+ -DCROSS_COMPILE_RVV=ON \
+ -DCMAKE_SYSTEM_NAME=Linux \
+ -DCMAKE_SYSTEM_PROCESSOR=riscv \
+ -DCMAKE_C_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang \
+ -DRISCV_GNU_TOOLCHAIN=${RISCV_GNU_TOOLCHAIN} \
+ -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \
+ -DCMAKE_C_FLAGS="-march=rv64gcv --target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN}/sysroot --gcc-toolchain=${RISCV_GNU_TOOLCHAIN} -fPIC" \
+ -DCMAKE_CXX_FLAGS="-march=rv64gcv --target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN}/sysroot --gcc-toolchain=${RISCV_GNU_TOOLCHAIN} -fPIC" \
+ -DRISCV_OMP_SHARED=${RISCV_OMP_SHARED} \
+ -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \
+ -DBUDDY_MLIR_BUILD_CROSS_DIR=${BUDDY_MLIR_BUILD_CROSS_DIR} \
+ -DBUDDY_MLIR_CROSS_LIB_DIR=${BUDDY_MLIR_BUILD_CROSS_DIR}/lib
+
+################################################################################
+# 8. Prepare Build Folder and Run CMake for cross-compile
+################################################################################
+
+mkdir -p $BENCHMARK_PATH/test_result
+BUILD_LOG="${BENCHMARK_PATH}/test_result/deeplearning/build_results_crosscompile_summary.log"
+> "${BUILD_LOG}" # Clear/create the file
+
+echo "[Info] Building all benchmarks with Ninja..."
+for target in "${BENCHMARK_TARGETS[@]}"; do
+ echo "==> ninja ${target}"
+ if ninja "${target}"; then
+ echo "[Success] Build of '${target}'" | tee -a "${BUILD_LOG}"
+ else
+ echo "[Failed] Build of '${target}'" | tee -a "${BUILD_LOG}"
+ fi
+done
+
+
+echo
+echo "[Info] All build/run steps completed (script did not stop on failures)."
+echo "[Info] Build summary: ${BUILD_LOG}"
+echo "[Info] Run summary: ${RUN_LOG}"
+
+
+cmake -G Ninja .. \
+ -DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \
+ -DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \
+ -DLLVM_ENABLE_ASSERTIONS=ON \
+ -DCMAKE_BUILD_TYPE=RELEASE \
+ -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \
+ -DPython3_EXECUTABLE=$(which python3)
\ No newline at end of file
diff --git a/test/test_script_geminiprocessing.sh b/test/test_script_geminiprocessing.sh
new file mode 100755
index 00000000..b151cb5b
--- /dev/null
+++ b/test/test_script_geminiprocessing.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+
+export BUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build
+export LLVM_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/llvm/build
+export CHIPYARD_DIR=/home/buddy-complier-workspace/chipyard
+export BUDDY_BENCHMARK_DIR=/home/buddy-complier-workspace/buddy-benchmark
+
+cd "${CHIPYARD_DIR}"
+git config --global --add safe.directory /home/buddy-complier-workspace/chipyard
+git checkout 1.8.1
+
+# Initialize and update the 'generators/gemmini' submodule and any submodules inside it.
+git config --global --add safe.directory /home/buddy-complier-workspace/chipyard/generators/gemmini
+git submodule update --init --recursive generators/gemmini
+
+#############################################
+# 1. Initialize Conda for the current shell
+#############################################
+conda init bash # or "conda init" if you’re already in a bash shell
+
+#############################################
+# 2. Check if 'chipyard' environment exists
+#############################################
+if conda env list | grep -qE '^[^ ]*\s+chipyard\s'; then
+ echo "[INFO] Found existing 'chipyard' environment. Activating it."
+else
+ echo "[INFO] 'chipyard' environment not found. Creating it..."
+ # Example creation command - adjust packages as needed
+ conda create -y -n chipyard python=3.10 \
+ cmake ninja \
+ # plus any other dependencies needed...
+fi
+
+conda activate chipyard
+
+#############################################
+# 3. Source build-setup and env.sh
+#############################################
+# If your script uses conda-lock or has pinned requirements,
+# you might need to call build-setup.sh so it *creates* the
+# .conda-env environment. But be sure it doesn’t conflict
+# with your newly created 'chipyard' environment.
+source build-setup.sh esp-tools
+source env.sh
+
+#############################################
+# 4. Proceed with your build
+#############################################
+cd "${BUDDY_BENCHMARK_DIR}"
+rm -rf build
+# Remove any existing build directory and create a fresh one.
+mkdir -p build && cd build
+
+RESULT_DIR="${BUDDY_BENCHMARK_DIR}/test_result/geminiprocessing"
+mkdir -p "${RESULT_DIR}"
+
+export C_PATH=$(which riscv64-unknown-linux-gnu-gcc)
+export CXX_PATH=$(which riscv64-unknown-linux-gnu-g++)
+export CLinker_PATH=$(which riscv64-unknown-linux-gnu-ld)
+
+# Print Address here
+echo "[Info] C_COMPILER_PATH = ${C_PATH}"
+echo "[Info] CXX_COMPILER_PATH = ${CXX_PATH}"
+echo "[Info] C_LINKER_PATH = ${CLinker_PATH}"
+echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}"
+echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}"
+echo "[Info] CHIPYARD_DIR = ${CHIPYARD_DIR}"
+echo "[Info] BUDDY_BENCHMARK_DIR = ${BUDDY_BENCHMARK_DIR}"
+echo "[Info] RESULT_DIR = ${RESULT_DIR}"
+
+echo "[Info] Running CMake configuration..."
+cmake -G Ninja .. \
+ -DCMAKE_C_COMPILER=${C_PATH} \
+ -DCMAKE_CXX_COMPILER=${CXX_PATH} \
+ -DCMAKE_LINKER=${CLinker_PATH} \
+ -DCMAKE_BUILD_TYPE=RELEASE \
+ -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \
+ -DGEMMINI_INCLUDE_DIR=${CHIPYARD_DIR}/generators/gemmini/software/gemmini-rocc-tests/include/ \
+ -DGEMMINI_BENCHMARKS=ON \
+ 2>&1 | tee "${RESULT_DIR}/cmake_configure.log"
+
+ninja 2>&1 | tee "${RESULT_DIR}/build.log"
+
+# ```[1/21] Creating directories for 'project_googlebenchmark'
+# [2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o
+# FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o
+# riscv64-unknown-linux-gnu-gcc -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c
+# /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4':
+# /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given
+# 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0);
+# | ^
+# In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23:```
+
+# cd bin
+# ./vectorization-matrix-benchmark 2>&1 | tee "${RESULT_DIR}/run.log"
+
+echo "[Info] CMake, build, and run logs are stored in ${RESULT_DIR}"
diff --git a/test/test_script_imageprocessing.sh b/test/test_script_imageprocessing.sh
new file mode 100755
index 00000000..0bdf9fa1
--- /dev/null
+++ b/test/test_script_imageprocessing.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+
+# NEW: Create results directory and update log file path
+RESULT_DIR="${PWD}/test_result/imageprocessing"
+mkdir -p "$RESULT_DIR"
+LOG="${RESULT_DIR}/image-processing-result.log"
+echo "Benchmark results - $(date)" > "$LOG"
+
+# Function to check CPU flag support
+supports() {
+ local flag=$(echo "$1" | tr '[:upper:]' '[:lower:]')
+ if grep -qi "$flag" /proc/cpuinfo; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+features=("SSE" "AVX2" "AVX512" "NEON")
+images=("../benchmarks/ImageProcessing/Images/YuTu.png")
+kernels=("prewittKernelAlign" "sobel3x3KernelAlign" "sobel5x5KernelAlign" "sobel7x7KernelAlign" "sobel9x9KernelAlign" "laplacianKernelAlign" "logKernelAlign")
+kernelmorphs=("random3x3KernelAlignInt")
+boundaries=("CONSTANT_PADDING" "REPLICATE_PADDING")
+
+for feature in "${features[@]}"; do
+ echo "Testing $feature support" | tee -a "$LOG"
+ if supports "$feature"; then
+ echo "$feature is supported." | tee -a "$LOG"
+ mkdir -p build_${feature} && cd build_${feature}
+ cmake -G Ninja .. \
+ -DCMAKE_BUILD_TYPE=RELEASE \
+ -DIMAGE_PROCESSING_BENCHMARKS=ON \
+ -DOpenCV_DIR=$PWD/../thirdparty/opencv/build/ \
+ -DEIGEN_DIR=$PWD/../thirdparty/eigen/ \
+ -DBUDDY_OPT_ATTR=$(echo "$feature" | tr '[:upper:]' '[:lower:]') \
+ -DBUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build
+ ninja image-processing-benchmark
+ echo "Running image-processing-benchmark for $feature" | tee -a "$LOG"
+ # ---------------------------------------------------------------------------
+ # inside the big loop – ONLY this section is changed
+ # ---------------------------------------------------------------------------
+ for img in "${images[@]}"; do
+ img_slug=$(basename "$img" .png) # YuTu → YuTu
+ for kern in "${kernels[@]}"; do
+ for morph in "${kernelmorphs[@]}"; do
+ for boundary in "${boundaries[@]}"; do
+ echo "Running: $img $kern $morph $boundary" | tee -a "$LOG"
+
+ # ---- NEW: build a unique JSON filename ---------------------------------
+ slug="$(echo "${feature}_${img_slug}_${kern}_${morph}_${boundary}" \
+ | tr ' /' '__')"
+ json_out="${RESULT_DIR}/${slug}.json"
+ log_out="${RESULT_DIR}/${slug}.log"
+ # -----------------------------------------------------------------------
+
+ ./bin/image-processing-benchmark \
+ "$img" "$kern" "$morph" "$boundary" \
+ --benchmark_out="$json_out" \
+ --benchmark_out_format=json \
+ > "$log_out" 2>&1
+ echo "[Success] …" | tee -a "$LOG"
+ done
+ done
+ done
+ done
+ cd ..
+ else
+ echo "CPU does not support $feature." | tee -a "$LOG"
+ fi
+done
+
+# NEW: Clean up build directories
+for feature in "${features[@]}"; do
+ rm -rf "build_${feature}"
+done
\ No newline at end of file
diff --git a/test/test_script_vectorizationprocessing.sh b/test/test_script_vectorizationprocessing.sh
new file mode 100755
index 00000000..6bf34fbc
--- /dev/null
+++ b/test/test_script_vectorizationprocessing.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+
+# apt update
+# apt install -y libc6-riscv64-cross
+# apt install -y \
+# libc6-riscv64-cross \
+# libstdc++6-riscv64-cross \
+# libgcc-s1-riscv64-cross
+################################################################################
+# 1. Script Setup
+################################################################################
+set -e
+BUDDY_MLIR_BUILD_DIR="/home/buddy-complier-workspace/buddy-mlir/build"
+LLVM_MLIR_BUILD_DIR="/home/buddy-complier-workspace/buddy-mlir/llvm/build"
+# Export environment variables:
+PYTHONPATH="${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH}"
+BUDDY_MLIR_BUILD_CROSS_DIR=${BUDDY_MLIR_BUILD_DIR}/../build
+RISCV_GNU_TOOLCHAIN=${BUDDY_MLIR_BUILD_DIR}/../thirdparty/riscv-gnu-toolchain
+RISCV_OMP_SHARED=${LLVM_MLIR_BUILD_DIR}/../build/lib/libomp.so
+BENCHMARK_PATH="${BUDDY_MLIR_DIR}/../buddy-benchmark"
+
+echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}"
+echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}"
+
+RESULT_DIR="${PWD}/test_result/vectorization"
+mkdir -p "${RESULT_DIR}"
+LOG_FILE="${RESULT_DIR}/vectorization_result.log"
+echo "Vectorization Benchmark - $(date)" > "${LOG_FILE}"
+
+################################################################################
+# 2. Build Benchmark
+################################################################################
+cd /home/buddy-complier-workspace/buddy-benchmark
+echo "[Info] Starting vectorization-matrix-benchmark build..." | tee -a "${LOG_FILE}"
+rm -rf build
+mkdir -p build && cd build
+echo "[Info] Running CMake configuration..." | tee -a "${LOG_FILE}"
+cmake -G Ninja .. \
+ -DCMAKE_BUILD_TYPE=RELEASE \
+ -DVECTORIZATION_BENCHMARKS=ON \
+ -DBUDDY_MLIR_BUILD_DIR="${BUDDY_MLIR_BUILD_DIR}" 2>&1 | tee -a "${LOG_FILE}"
+
+echo "[Info] Building vectorization-matrix-benchmark..." | tee -a "${LOG_FILE}"
+ninja vectorization-matrix-benchmark 2>&1 | tee -a "${LOG_FILE}"
+
+export QEMU_LD_PREFIX=/usr/riscv64-linux-gnu
+################################################################################
+# 3. Run Benchmark
+################################################################################
+cd bin
+echo "[Info] Running vectorization-matrix-benchmark..." | tee -a "${LOG_FILE}"
+json_out="${RESULT_DIR}/vectorization_matrix.json"
+log_out="${RESULT_DIR}/vectorization_matrix.log"
+
+./vectorization-matrix-benchmark \
+ --benchmark_out="$json_out" \
+ --benchmark_out_format=json \
+ > "$log_out" 2>&1
+tee -a "$LOG_FILE" < "$log_out"
+
+
+echo "[Info] Benchmark completed. Log saved to ${LOG_FILE}"
\ No newline at end of file
diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log
new file mode 100644
index 00000000..ddd8a7cc
--- /dev/null
+++ b/test_result/deeplearning/build_results_crosscompile_summary.log
@@ -0,0 +1,54 @@
+[Failed] Build of 'dl-model-tinyllama-benchmark'
+[Failed] Build of 'dl-model-mobilenetv3-benchmark'
+[Success] Build of 'dl-model-lenet-benchmark'
+[Failed] Build of 'dl-model-bert-benchmark'
+[Failed] Build of 'dl-model-whisper-benchmark'
+[Failed] Build of 'dl-model-resnet18-benchmark'
+[Success] Build of 'dl-layer-ffn-benchmark'
+[Success] Build of 'dl-layer-selfattention-benchmark'
+[Success] Build of 'dl-layer-rmsnorm-benchmark'
+[Failed] Build of 'dl-op-linalg-matmul-benchmark'
+[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark'
+[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark'
+[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark'
+[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark'
+[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark'
+[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark'
+[Success] Build of 'dl-op-linalg-arithaddf-benchmark'
+[Success] Build of 'dl-op-linalg-arithdivf-benchmark'
+[Success] Build of 'dl-op-linalg-arithmulf-benchmark'
+[Success] Build of 'dl-op-linalg-arithnegf-benchmark'
+[Success] Build of 'dl-op-linalg-arithsubf-benchmark'
+[Success] Build of 'dl-op-linalg-mathfpow-benchmark'
+[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark'
+[Success] Build of 'dl-op-linalg-mathexp-benchmark'
+[Success] Build of 'dl-op-linalg-reduceaddf-benchmark'
+[Success] Build of 'dl-op-linalg-reducemaxf-benchmark'
+[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark'
+[Failed] Build of 'dl-op-tosa-transpose-benchmark'
+[Failed] Build of 'dl-op-matmul-transpose-b-benchmark'
+[Failed] Build of 'dl-model-whisper-benchmark'
+[Failed] Build of 'dl-model-resnet18-benchmark'
+[Success] Build of 'dl-layer-ffn-benchmark'
+[Success] Build of 'dl-layer-selfattention-benchmark'
+[Success] Build of 'dl-layer-rmsnorm-benchmark'
+[Failed] Build of 'dl-op-linalg-matmul-benchmark'
+[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark'
+[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark'
+[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark'
+[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark'
+[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark'
+[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark'
+[Success] Build of 'dl-op-linalg-arithaddf-benchmark'
+[Success] Build of 'dl-op-linalg-arithdivf-benchmark'
+[Success] Build of 'dl-op-linalg-arithmulf-benchmark'
+[Success] Build of 'dl-op-linalg-arithnegf-benchmark'
+[Success] Build of 'dl-op-linalg-arithsubf-benchmark'
+[Success] Build of 'dl-op-linalg-mathfpow-benchmark'
+[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark'
+[Success] Build of 'dl-op-linalg-mathexp-benchmark'
+[Success] Build of 'dl-op-linalg-reduceaddf-benchmark'
+[Success] Build of 'dl-op-linalg-reducemaxf-benchmark'
+[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark'
+[Failed] Build of 'dl-op-tosa-transpose-benchmark'
+[Failed] Build of 'dl-op-matmul-transpose-b-benchmark'
diff --git a/test_result/deeplearning/build_results_summary.log b/test_result/deeplearning/build_results_summary.log
new file mode 100644
index 00000000..0f7a7c2e
--- /dev/null
+++ b/test_result/deeplearning/build_results_summary.log
@@ -0,0 +1,57 @@
+[Failed] Build of 'dl-model-mobilenetv3-benchmark'
+[Failed] Build of 'dl-model-lenet-benchmark'
+[Failed] Build of 'dl-model-bert-benchmark'
+[Failed] Build of 'dl-model-whisper-benchmark'
+[Failed] Build of 'dl-model-resnet18-benchmark'
+[Failed] Build of 'dl-layer-ffn-benchmark'
+[Failed] Build of 'dl-layer-selfattention-benchmark'
+[Failed] Build of 'dl-layer-rmsnorm-benchmark'
+[Failed] Build of 'dl-op-linalg-matmul-benchmark'
+[Failed] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark'
+[Failed] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark'
+[Failed] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark'
+[Failed] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark'
+[Failed] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark'
+[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark'
+[Failed] Build of 'dl-op-linalg-arithaddf-benchmark'
+[Failed] Build of 'dl-op-linalg-arithdivf-benchmark'
+[Failed] Build of 'dl-op-linalg-arithmulf-benchmark'
+[Failed] Build of 'dl-op-linalg-arithnegf-benchmark'
+[Failed] Build of 'dl-op-linalg-arithsubf-benchmark'
+[Failed] Build of 'dl-op-linalg-mathfpow-benchmark'
+[Failed] Build of 'dl-op-linalg-mathrsqrt-benchmark'
+[Failed] Build of 'dl-op-linalg-mathexp-benchmark'
+[Failed] Build of 'dl-op-linalg-reduceaddf-benchmark'
+[Failed] Build of 'dl-op-linalg-reducemaxf-benchmark'
+[Failed] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark'
+[Failed] Build of 'dl-op-tosa-transpose-benchmark'
+[Failed] Build of 'dl-op-matmul-transpose-b-benchmark'
+[Failed] Build of 'dl-model-tinyllama-benchmark'
+[Failed] Build of 'dl-model-mobilenetv3-benchmark'
+[Success] Build of 'dl-model-lenet-benchmark'
+[Failed] Build of 'dl-model-bert-benchmark'
+[Failed] Build of 'dl-model-whisper-benchmark'
+[Failed] Build of 'dl-model-resnet18-benchmark'
+[Success] Build of 'dl-layer-ffn-benchmark'
+[Success] Build of 'dl-layer-selfattention-benchmark'
+[Success] Build of 'dl-layer-rmsnorm-benchmark'
+[Failed] Build of 'dl-op-linalg-matmul-benchmark'
+[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark'
+[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark'
+[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark'
+[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark'
+[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark'
+[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark'
+[Success] Build of 'dl-op-linalg-arithaddf-benchmark'
+[Success] Build of 'dl-op-linalg-arithdivf-benchmark'
+[Success] Build of 'dl-op-linalg-arithmulf-benchmark'
+[Success] Build of 'dl-op-linalg-arithnegf-benchmark'
+[Success] Build of 'dl-op-linalg-arithsubf-benchmark'
+[Success] Build of 'dl-op-linalg-mathfpow-benchmark'
+[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark'
+[Success] Build of 'dl-op-linalg-mathexp-benchmark'
+[Success] Build of 'dl-op-linalg-reduceaddf-benchmark'
+[Success] Build of 'dl-op-linalg-reducemaxf-benchmark'
+[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark'
+[Failed] Build of 'dl-op-tosa-transpose-benchmark'
+[Failed] Build of 'dl-op-matmul-transpose-b-benchmark'
diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.json b/test_result/deeplearning/dl-layer-ffn-benchmark.json
new file mode 100644
index 00000000..fdea2004
--- /dev/null
+++ b/test_result/deeplearning/dl-layer-ffn-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:45:30+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-layer-ffn-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.51807,3.40967,5.1626],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_LAYER_FFN/Scalar",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_LAYER_FFN/Scalar",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 10218,
+ "real_time": 6.7533425334895703e-02,
+ "cpu_time": 6.7531935701702864e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_LAYER_FFN/Auto_Vectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_LAYER_FFN/Auto_Vectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 26193,
+ "real_time": 2.6626899614870417e-02,
+ "cpu_time": 2.6626213683045089e-02,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json
new file mode 100644
index 00000000..e7f27984
--- /dev/null
+++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:45:34+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-layer-rmsnorm-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.47656,3.38623,5.14551],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_LAYER_RMSNORM/Scalar",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_LAYER_RMSNORM/Scalar",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 339474,
+ "real_time": 1.9830409605425532e-03,
+ "cpu_time": 1.9829382397473739e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_LAYER_RMSNORM/Auto_Vectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_LAYER_RMSNORM/Auto_Vectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 780156,
+ "real_time": 8.9165813354251345e-04,
+ "cpu_time": 8.9162349196827311e-04,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.json b/test_result/deeplearning/dl-layer-selfattention-benchmark.json
new file mode 100644
index 00000000..f66a0d7e
--- /dev/null
+++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:45:32+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-layer-selfattention-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.51807,3.40967,5.1626],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_LAYER_ATTENTION/Scalar",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_LAYER_ATTENTION/Scalar",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 144,
+ "real_time": 4.8677878868248730e+00,
+ "cpu_time": 4.8676234444444439e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_LAYER_ATTENTION/Auto_Vectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_LAYER_ATTENTION/Auto_Vectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 435,
+ "real_time": 1.5936243722493622e+00,
+ "cpu_time": 1.5935723448275860e+00,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.json b/test_result/deeplearning/dl-model-lenet-benchmark.json
new file mode 100644
index 00000000..f50ed8e9
--- /dev/null
+++ b/test_result/deeplearning/dl-model-lenet-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:41:48+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-model-lenet-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.67334,4.12793,5.80713],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_MODEL_LENET/Auto_Vectorization",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_MODEL_LENET/Auto_Vectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4111,
+ "real_time": 1.7333792885473193e-01,
+ "cpu_time": 1.7333462247628315e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_MODEL_LENET/Buddy_Vectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_MODEL_LENET/Buddy_Vectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4846,
+ "real_time": 1.4355380335623599e-01,
+ "cpu_time": 1.4355146595130003e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json
new file mode 100644
index 00000000..dd135dd9
--- /dev/null
+++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:41:45+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-model-mobilenetv3-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.67334,4.12793,5.80713],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_MobileNet_V3/BM_MobileNet_V3_scalar",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_MobileNet_V3/BM_MobileNet_V3_scalar",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 17,
+ "real_time": 3.9183362222769681e+01,
+ "cpu_time": 3.9182252941176472e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_MobileNet_V3/BM_MobileNet_V3_conv_opt",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_MobileNet_V3/BM_MobileNet_V3_conv_opt",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 20,
+ "real_time": 3.4668323397636414e+01,
+ "cpu_time": 3.4667267849999995e+01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log
new file mode 100644
index 00000000..9a53be36
--- /dev/null
+++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log
@@ -0,0 +1,19 @@
+2025-09-07T12:41:45+00:00
+Running ./dl-model-mobilenetv3-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.67, 4.13, 5.81
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+-----------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+-----------------------------------------------------------------------------------
+BM_MobileNet_V3/BM_MobileNet_V3_scalar 39.2 ms 39.2 ms 17
+BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 34.7 ms 34.7 ms 20
+-----------------------------------------------------------
+Correctness Verification:
+Transform case: [32mPASS[0m
+-----------------------------------------------------------
diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.json b/test_result/deeplearning/dl-model-resnet18-benchmark.json
new file mode 100644
index 00000000..1628c1ef
--- /dev/null
+++ b/test_result/deeplearning/dl-model-resnet18-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:45:27+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-model-resnet18-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.47607,3.41699,5.17432],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_MODEL_Resnet18/Auto_Vectorization",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_MODEL_Resnet18/Auto_Vectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 7.6702358201146126e+02,
+ "cpu_time": 7.6673241800000005e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_MODEL_Resnet18/Buddy_Vectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_MODEL_Resnet18/Buddy_Vectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 7.7053957059979439e+02,
+ "cpu_time": 7.7040162699999996e+02,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.log b/test_result/deeplearning/dl-model-resnet18-benchmark.log
new file mode 100644
index 00000000..97e62844
--- /dev/null
+++ b/test_result/deeplearning/dl-model-resnet18-benchmark.log
@@ -0,0 +1,18 @@
+2025-09-07T12:45:27+00:00
+Running ./dl-model-resnet18-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 2.48, 3.42, 5.17
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------
+DL_MODEL_Resnet18/Auto_Vectorization 767 ms 767 ms 1
+DL_MODEL_Resnet18/Buddy_Vectorization 771 ms 770 ms 1
+-----------------------------------------------------------
+Correctness Verification: [32mPASS[0m
+-----------------------------------------------------------
diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.json b/test_result/deeplearning/dl-model-tinyllama-benchmark.json
new file mode 100644
index 00000000..3961b66f
--- /dev/null
+++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.json
@@ -0,0 +1,82 @@
+{
+ "context": {
+ "date": "2025-09-07T12:35:22+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-model-tinyllama-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [4.896,5.53271,6.99316],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_MODEL_TINYLLAMA/scalar",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_MODEL_TINYLLAMA/scalar",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 1.7120061315596104e+05,
+ "cpu_time": 1.7119792047700004e+05,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_MODEL_TINYLLAMA/matmul_opt",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_MODEL_TINYLLAMA/matmul_opt",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 1.1143549453467131e+04,
+ "cpu_time": 1.1135273949000009e+04,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_MODEL_TINYLLAMA/matmul_opt_omp",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "DL_MODEL_TINYLLAMA/matmul_opt_omp",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 8.3347530625760555e+03,
+ "cpu_time": 7.7325455960000227e+03,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.log b/test_result/deeplearning/dl-model-tinyllama-benchmark.log
new file mode 100644
index 00000000..b6f53ed8
--- /dev/null
+++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.log
@@ -0,0 +1,19 @@
+2025-09-07T12:35:22+00:00
+Running ./dl-model-tinyllama-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 4.90, 5.53, 6.99
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+----------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+----------------------------------------------------------------------------
+DL_MODEL_TINYLLAMA/scalar 171201 ms 171198 ms 1
+DL_MODEL_TINYLLAMA/matmul_opt 11144 ms 11135 ms 1
+DL_MODEL_TINYLLAMA/matmul_opt_omp 8335 ms 7733 ms 1
+[34m---------- Verification ----------[0m
+matmul_opt [32mPASS[0m
+matmul_opt_omp [32mPASS[0m
diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.json b/test_result/deeplearning/dl-model-whisper-benchmark.json
new file mode 100644
index 00000000..add9864c
--- /dev/null
+++ b/test_result/deeplearning/dl-model-whisper-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:41:50+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-model-whisper-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.69971,4.12549,5.79736],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_MODEL_Whisper/Auto_Vectorization",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_MODEL_Whisper/Auto_Vectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 8.8294716205447912e+04,
+ "cpu_time": 8.8293256732999987e+04,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_MODEL_Whisper/Buddy_Vectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_MODEL_Whisper/Buddy_Vectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 4.0465919472277164e+04,
+ "cpu_time": 4.0458067526999999e+04,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.log b/test_result/deeplearning/dl-model-whisper-benchmark.log
new file mode 100644
index 00000000..8cfcecec
--- /dev/null
+++ b/test_result/deeplearning/dl-model-whisper-benchmark.log
@@ -0,0 +1,15 @@
+2025-09-07T12:41:50+00:00
+Running ./dl-model-whisper-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.70, 4.13, 5.80
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+-------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+-------------------------------------------------------------------------------
+DL_MODEL_Whisper/Auto_Vectorization 88295 ms 88293 ms 1
+DL_MODEL_Whisper/Buddy_Vectorization 40466 ms 40458 ms 1
diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json
new file mode 100644
index 00000000..bc957097
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:04+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-arithaddf-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.34521,3.26758,5.04932],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_ADDF_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_ADDF_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 22527,
+ "real_time": 3.1060902958688446e-02,
+ "cpu_time": 3.1059653438096500e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_ADDF_AutoVectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_ADDF_AutoVectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 169988,
+ "real_time": 4.8817289258969946e-03,
+ "cpu_time": 4.8816731710473685e-03,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json
new file mode 100644
index 00000000..629be93f
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:07+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-arithdivf-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.34521,3.26758,5.04932],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_DIVF_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_DIVF_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 22003,
+ "real_time": 3.2068282908939941e-02,
+ "cpu_time": 3.2067214334408942e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_DIVF_AutoVectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_DIVF_AutoVectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 69823,
+ "real_time": 1.0602428337310130e-02,
+ "cpu_time": 1.0602179811809862e-02,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json
new file mode 100644
index 00000000..a7052857
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:09+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-arithmulf-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.39795,3.2627,5.03809],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_MULF_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_MULF_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 22824,
+ "real_time": 3.0709744117373212e-02,
+ "cpu_time": 3.0708664607430772e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_MULF_AutoVectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_MULF_AutoVectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 169993,
+ "real_time": 4.1166770421966290e-03,
+ "cpu_time": 4.1165691234344949e-03,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json
new file mode 100644
index 00000000..282e1318
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:11+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-arithnegf-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.39795,3.2627,5.03809],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_NEGF_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_NEGF_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 29588,
+ "real_time": 2.3588028378715157e-02,
+ "cpu_time": 2.3587252737596327e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_NEGF_AutoVectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_NEGF_AutoVectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 237464,
+ "real_time": 2.9502898712950253e-03,
+ "cpu_time": 2.9501475507866456e-03,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json
new file mode 100644
index 00000000..4660fe60
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:13+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-arithsubf-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.39795,3.2627,5.03809],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_SUBF_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_SUBF_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 22687,
+ "real_time": 3.0744381588195889e-02,
+ "cpu_time": 3.0743076916295679e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_SUBF_AutoVectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_SUBF_AutoVectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 170328,
+ "real_time": 4.1076257294038214e-03,
+ "cpu_time": 4.1075212472406180e-03,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json
new file mode 100644
index 00000000..a25979f5
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json
@@ -0,0 +1,138 @@
+{
+ "context": {
+ "date": "2025-09-07T12:45:54+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-batch-matmul-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.40869,3.31104,5.08252],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_OPS_BATCH_MATMUL/Scalar/iterations:1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_BATCH_MATMUL/Scalar/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 3.6347736530005932e+03,
+ "cpu_time": 3.6346553479999998e+03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 1.0061066299676895e+03,
+ "cpu_time": 1.0060745660000001e+03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_BATCH_MATMUL/Vectorization/iterations:1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_BATCH_MATMUL/Vectorization/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 1.9591017067432404e+02,
+ "cpu_time": 1.9590338600000035e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_BATCH_MATMUL/Tile/iterations:1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_BATCH_MATMUL/Tile/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 1.1179352924227715e+02,
+ "cpu_time": 1.1179250600000046e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_BATCH_MATMUL/SCF/iterations:1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_BATCH_MATMUL/SCF/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 1.2078800052404404e+02,
+ "cpu_time": 1.2078363899999988e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 3.6683125793933868e+02,
+ "cpu_time": 3.6682773099999986e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 1.1074854433536530e+02,
+ "cpu_time": 2.2687625000000544e+01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log
new file mode 100644
index 00000000..8d059c82
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log
@@ -0,0 +1,25 @@
+2025-09-07T12:45:54+00:00
+Running ./dl-op-linalg-batch-matmul-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 2.41, 3.31, 5.08
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+---------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+---------------------------------------------------------------------------------------------
+DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3635 ms 3635 ms 1
+DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 1006 ms 1006 ms 1
+DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 196 ms 196 ms 1
+DL_OPS_BATCH_MATMUL/Tile/iterations:1 112 ms 112 ms 1
+DL_OPS_BATCH_MATMUL/SCF/iterations:1 121 ms 121 ms 1
+DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 367 ms 367 ms 1
+DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 111 ms 22.7 ms 1
+[34m---------- Verification ----------[0m
+Tile [32mPASS[0m
+SCF [32mPASS[0m
+BROADCAST [32mPASS[0m
+BROADCAST_OMP [32mPASS[0m
diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json
new file mode 100644
index 00000000..3a979642
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:45:47+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-conv2d-nchw-fchw-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.4834,3.35645,5.1167],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_Conv2DNchwFchw_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_Conv2DNchwFchw_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2,
+ "real_time": 2.9066542163491249e+02,
+ "cpu_time": 2.9065969050000001e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_Conv2DNchwFchw_Im2col",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_Conv2DNchwFchw_Im2col",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 72,
+ "real_time": 8.5637474743028488e+00,
+ "cpu_time": 8.5636718611111107e+00,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json
new file mode 100644
index 00000000..595bcecd
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json
@@ -0,0 +1,96 @@
+{
+ "context": {
+ "date": "2025-09-07T12:45:51+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-conv2d-nhwc-fhwc-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.44434,3.3335,5.09961],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 7.3888380080461502e+01,
+ "cpu_time": 7.3885279400000002e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 9.7335599362850189e+00,
+ "cpu_time": 9.7335111999999988e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 1.8217429518699646e+00,
+ "cpu_time": 1.8217338000000027e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 1.7791815102100372e+00,
+ "cpu_time": 1.7791528000000056e+00,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json
new file mode 100644
index 00000000..17a679c1
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:45:49+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-conv2d-nhwc-hwcf-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.44434,3.3335,5.09961],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_CONV_2D_NHWC_HWCF_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_CONV_2D_NHWC_HWCF_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 21,
+ "real_time": 3.3404812571548277e+01,
+ "cpu_time": 3.3404357952380956e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_CONV_2D_NHWC_HWCF_AutoVectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_CONV_2D_NHWC_HWCF_AutoVectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 110,
+ "real_time": 6.2886948273940524e+00,
+ "cpu_time": 6.2886236181818180e+00,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json
new file mode 100644
index 00000000..b1b91623
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json
@@ -0,0 +1,82 @@
+{
+ "context": {
+ "date": "2025-09-07T12:45:52+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.44434,3.3335,5.09961],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 4.3137572705745697e+00,
+ "cpu_time": 4.3121678000000001e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 1.7169959843158722e+00,
+ "cpu_time": 1.7169760000000007e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 1.2791678309440613e-01,
+ "cpu_time": 1.2791580000000025e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log
new file mode 100644
index 00000000..5e616453
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log
@@ -0,0 +1 @@
+qemu-riscv64-static: Could not open '/lib/ld-linux-riscv64-lp64d.so.1': No such file or directory
diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json
new file mode 100644
index 00000000..1cc5b9de
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:19+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-mathexp-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.49072,3.25391,5.01562],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_EXP_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_EXP_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 14801,
+ "real_time": 4.7153966502236092e-02,
+ "cpu_time": 4.7153170799270318e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_EXP_AutoVectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_EXP_AutoVectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 21304,
+ "real_time": 3.2612131513344626e-02,
+ "cpu_time": 3.2610319517461503e-02,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json
new file mode 100644
index 00000000..0b5c9ab3
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:15+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-mathfpow-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.44629,3.2583,5.02686],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_FPOW_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_FPOW_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 8174,
+ "real_time": 8.5793241880358306e-02,
+ "cpu_time": 8.5789674944947408e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_FPOW_AutoVectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_FPOW_AutoVectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 11919,
+ "real_time": 5.8559470965724454e-02,
+ "cpu_time": 5.8556822887826147e-02,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json
new file mode 100644
index 00000000..e8085b93
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:17+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-mathrsqrt-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.44629,3.2583,5.02686],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_RSQRT_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_RSQRT_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 9351,
+ "real_time": 7.4849401154169840e-02,
+ "cpu_time": 7.4846361458667521e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_RSQRT_AutoVectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_RSQRT_AutoVectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 155807,
+ "real_time": 4.4754421888014168e-03,
+ "cpu_time": 4.4753065651735808e-03,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json
new file mode 100644
index 00000000..2cc316a3
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json
@@ -0,0 +1,110 @@
+{
+ "context": {
+ "date": "2025-09-07T12:45:36+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-matmul-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.47656,3.38623,5.14551],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_OPS_MATMUL/scalar_O0/iterations:1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_MATMUL/scalar_O0/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 4.0999811291694641e+03,
+ "cpu_time": 4.0998556719999997e+03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_MATMUL/scalar_O3/iterations:1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_MATMUL/scalar_O3/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 3.5827754400670528e+03,
+ "cpu_time": 3.5826578559999998e+03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_MATMUL/tile/iterations:1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_MATMUL/tile/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 1.0819802060723305e+02,
+ "cpu_time": 1.0819740099999997e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_MATMUL/vec/iterations:1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_MATMUL/vec/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 6.1437729746103287e+01,
+ "cpu_time": 6.1437198000000137e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_MATMUL/vec_omp/iterations:1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_MATMUL/vec_omp/iterations:1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1,
+ "real_time": 1.8467400223016739e+01,
+ "cpu_time": 7.8750589999998510e+00,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log
new file mode 100644
index 00000000..b46496bd
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log
@@ -0,0 +1,22 @@
+2025-09-07T12:45:36+00:00
+Running ./dl-op-linalg-matmul-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 2.48, 3.39, 5.15
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+-------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+-------------------------------------------------------------------------------
+DL_OPS_MATMUL/scalar_O0/iterations:1 4100 ms 4100 ms 1
+DL_OPS_MATMUL/scalar_O3/iterations:1 3583 ms 3583 ms 1
+DL_OPS_MATMUL/tile/iterations:1 108 ms 108 ms 1
+DL_OPS_MATMUL/vec/iterations:1 61.4 ms 61.4 ms 1
+DL_OPS_MATMUL/vec_omp/iterations:1 18.5 ms 7.88 ms 1
+[34m---------- Verification ----------[0m
+tile [32mPASS[0m
+vec [32mPASS[0m
+vec_omp [32mPASS[0m
diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json
new file mode 100644
index 00000000..e0b2bec9
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:45:52+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-pooling-nhwc-sum-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.44434,3.3335,5.09961],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_POOLING_NHWC_SUM_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_POOLING_NHWC_SUM_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2922,
+ "real_time": 2.4033439441913615e-01,
+ "cpu_time": 2.4032710540725533e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_POOLING_NHWC_SUM_AutoVectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_POOLING_NHWC_SUM_AutoVectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 16330,
+ "real_time": 4.2958081279586446e-02,
+ "cpu_time": 4.2957538089406000e-02,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json
new file mode 100644
index 00000000..82f932ac
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json
@@ -0,0 +1,38 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:21+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-reduceaddf-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.49072,3.25391,5.01562],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json
new file mode 100644
index 00000000..fdb7b390
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json
@@ -0,0 +1,38 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:21+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-reducemaxf-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.49072,3.25391,5.01562],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json
new file mode 100644
index 00000000..d57079ca
--- /dev/null
+++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:21+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-linalg-softmax-exp-sum-div-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.49072,3.25391,5.01562],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_SOFTMAXEXPSUMDIV_SCALAR",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "BM_SOFTMAXEXPSUMDIV_SCALAR",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 120007,
+ "real_time": 5.8092399238988792e-03,
+ "cpu_time": 5.8089997500145821e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "BM_SOFTMAXEXPSUMDIV_AutoVectorization",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "BM_SOFTMAXEXPSUMDIV_AutoVectorization",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 176914,
+ "real_time": 3.9636845145346869e-03,
+ "cpu_time": 3.9634847383474463e-03,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log
new file mode 100644
index 00000000..e69de29b
diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json
new file mode 100644
index 00000000..caa25dfa
--- /dev/null
+++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json
@@ -0,0 +1,96 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:24+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-matmul-transpose-b-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [4.61377,3.68164,5.14453],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 1.0958168849349022e+03,
+ "cpu_time": 1.0942407130000001e+03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 2.9605090841650963e+02,
+ "cpu_time": 2.9603718579999986e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 3.6250606924295425e+01,
+ "cpu_time": 2.4062124000000118e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 9.5354539155960083e+01,
+ "cpu_time": 9.5345416999999870e+01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log
new file mode 100644
index 00000000..98b32f04
--- /dev/null
+++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log
@@ -0,0 +1,21 @@
+2025-09-07T12:46:24+00:00
+Running ./dl-op-matmul-transpose-b-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 4.61, 3.68, 5.14
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+-----------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+-----------------------------------------------------------------------------------------------
+DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1096 ms 1094 ms 5
+DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 296 ms 296 ms 5
+DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 36.3 ms 24.1 ms 5
+DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 95.4 ms 95.3 ms 5
+[34m---------- Verification ----------[0m
+scalar_O3 [32mPASS[0m
+scalar_O3_omp [32mPASS[0m
+vec [32mPASS[0m
diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json
new file mode 100644
index 00000000..829e775c
--- /dev/null
+++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T12:46:23+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./dl-op-tosa-transpose-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.49072,3.25391,5.01562],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 4.1188374906778336e+01,
+ "cpu_time": 3.0001973999999997e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5,
+ "real_time": 2.9296264052391052e+01,
+ "cpu_time": 2.4695980400000003e+01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log
new file mode 100644
index 00000000..4b119245
--- /dev/null
+++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log
@@ -0,0 +1,17 @@
+2025-09-07T12:46:23+00:00
+Running ./dl-op-tosa-transpose-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 2.49, 3.25, 5.02
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+-------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+-------------------------------------------------------------------------------------
+DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 41.2 ms 30.0 ms 5
+DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 29.3 ms 24.7 ms 5
+[34m---------- Verification ----------[0m
+scalar_O3 [32mPASS[0m
diff --git a/test_result/deeplearning/run_results_summary.log b/test_result/deeplearning/run_results_summary.log
new file mode 100644
index 00000000..309ac991
--- /dev/null
+++ b/test_result/deeplearning/run_results_summary.log
@@ -0,0 +1,49 @@
+[Missing] Executable not found for 'dl-model-tinyllama-benchmark'
+[Missing] Executable not found for 'dl-model-mobilenetv3-benchmark'
+[Failed] Run of 'dl-model-lenet-benchmark'
+ ↳ stdout/stderr → dl-model-lenet-benchmark.log (may contain errors)
+[Missing] Executable not found for 'dl-model-bert-benchmark'
+[Missing] Executable not found for 'dl-model-whisper-benchmark'
+[Missing] Executable not found for 'dl-model-resnet18-benchmark'
+[Failed] Run of 'dl-layer-ffn-benchmark'
+ ↳ stdout/stderr → dl-layer-ffn-benchmark.log (may contain errors)
+[Failed] Run of 'dl-layer-selfattention-benchmark'
+ ↳ stdout/stderr → dl-layer-selfattention-benchmark.log (may contain errors)
+[Failed] Run of 'dl-layer-rmsnorm-benchmark'
+ ↳ stdout/stderr → dl-layer-rmsnorm-benchmark.log (may contain errors)
+[Missing] Executable not found for 'dl-op-linalg-matmul-benchmark'
+[Failed] Run of 'dl-op-linalg-conv2d-nchw-fchw-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-conv2d-nchw-fchw-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-pooling-nhwc-sum-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-pooling-nhwc-sum-benchmark.log (may contain errors)
+[Missing] Executable not found for 'dl-op-linalg-batch-matmul-benchmark'
+[Failed] Run of 'dl-op-linalg-arithaddf-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-arithaddf-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-arithdivf-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-arithdivf-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-arithmulf-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-arithmulf-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-arithnegf-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-arithnegf-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-arithsubf-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-arithsubf-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-mathfpow-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-mathfpow-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-mathrsqrt-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-mathrsqrt-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-mathexp-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-mathexp-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-reduceaddf-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-reduceaddf-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-reducemaxf-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-reducemaxf-benchmark.log (may contain errors)
+[Failed] Run of 'dl-op-linalg-softmax-exp-sum-div-benchmark'
+ ↳ stdout/stderr → dl-op-linalg-softmax-exp-sum-div-benchmark.log (may contain errors)
+[Missing] Executable not found for 'dl-op-tosa-transpose-benchmark'
+[Missing] Executable not found for 'dl-op-matmul-transpose-b-benchmark'
diff --git a/test_result/geminiprocessing/build.log b/test_result/geminiprocessing/build.log
new file mode 100644
index 00000000..aa1b4a29
--- /dev/null
+++ b/test_result/geminiprocessing/build.log
@@ -0,0 +1,655 @@
+[1/21] Creating directories for 'project_googlebenchmark'
+[2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o
+FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o
+riscv64-unknown-linux-gnu-gcc -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4':
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given
+ 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0);
+ | ^
+In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23:
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:251: note: macro "gemmini_extended_config_ex" defined here
+ 251 | #define gemmini_extended_config_ex(dataflow, sys_act, sys_shift, relu6_shift, A_stride, A_transpose, B_transpose) \
+ |
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: error: 'gemmini_extended_config_ex' undeclared (first use in this function)
+ 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: note: each undeclared identifier is reported only once for each function it appears in
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:35:18: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
+ 35 | int32_t *res = (int32_t*) ((uint32_t)gemm_acc_malloc (16 * 16 * 4 * 4 * sizeof(int32_t)));
+ | ^
+In file included from /home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:20,
+ from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23:
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload'
+ 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:119: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload'
+ 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload'
+ 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:125: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload'
+ 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload'
+ 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload'
+ 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload'
+ 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload'
+ 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload'
+ 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:126: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload'
+ 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload'
+ 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:132: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload'
+ 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload'
+ 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload'
+ 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload'
+ 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload'
+ 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload'
+ 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload'
+ 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload'
+ 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload'
+ 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload'
+ 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload'
+ 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload'
+ 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload'
+ 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload'
+ 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload'
+ 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload'
+ 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload'
+ 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload'
+ 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload'
+ 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload'
+ 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload'
+ 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
+ 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:89: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) );
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:9: note: in expansion of macro 'gemmini_extended_mvout'
+ 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) );
+ | ^~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) );
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:9: note: in expansion of macro 'gemmini_extended_mvout'
+ 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) );
+ | ^~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) );
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:9: note: in expansion of macro 'gemmini_extended_mvout'
+ 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) );
+ | ^~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) );
+ | ^
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
+ 152 | : "r"(rs1), "r"(rs2)); \
+ | ^~~
+/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
+ 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:9: note: in expansion of macro 'gemmini_extended_mvout'
+ 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) );
+ | ^~~~~~~~~~~~~~~~~~~~~~
+/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:105:17: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
+ 105 | gemm_acc_free((uint32_t)(res));
+ | ^
+[3/21] Building CXX object benchmarks/Gemmini/ResNet-101/CMakeFiles/CRunnerUtils.dir/CRunnerUtils.cpp.o
+[4/21] Generating buddy_matmul.o
+[5/21] Performing download step (git clone) for 'project_googlebenchmark'
+Cloning into 'project_googlebenchmark'...
+HEAD is now at f91b6b4 bump version to 1.6 in preparation for release
+[6/21] Generating resnet-101.o
+ninja: build stopped: subcommand failed.
diff --git a/test_result/geminiprocessing/cmake_configure.log b/test_result/geminiprocessing/cmake_configure.log
new file mode 100644
index 00000000..a3a42f37
--- /dev/null
+++ b/test_result/geminiprocessing/cmake_configure.log
@@ -0,0 +1,37 @@
+-- The CXX compiler identification is GNU 9.2.0
+-- The C compiler identification is GNU 9.2.0
+-- Detecting CXX compiler ABI info
+-- Detecting CXX compiler ABI info - done
+-- Check for working CXX compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-g++ - skipped
+-- Detecting CXX compile features
+-- Detecting CXX compile features - done
+-- Detecting C compiler ABI info
+-- Detecting C compiler ABI info - done
+-- Check for working C compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-gcc - skipped
+-- Detecting C compile features
+-- Detecting C compile features - done
+-- Configuring Target Architecture: avx512f
+-- Configuring Target Triple: x86_64-unknown-linux-gnu
+-- Configuring benchmarks: google
+-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
+-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
+-- Looking for pthread_create in pthreads
+-- Looking for pthread_create in pthreads - not found
+-- Looking for pthread_create in pthread
+-- Looking for pthread_create in pthread - found
+-- Found Threads: TRUE
+-- Performing Test HAVE_SSE
+-- Performing Test HAVE_SSE - Failed
+-- SSE support - no
+-- Performing Test HAVE_AVX2
+-- Performing Test HAVE_AVX2 - Failed
+-- AVX2 support - no
+-- Performing Test HAVE_AVX512
+-- Performing Test HAVE_AVX512 - Failed
+-- AVX512 support - no
+-- Performing Test HAVE_NEON
+-- Performing Test HAVE_NEON - Failed
+-- Arm Neon support - no
+-- Configuring done
+-- Generating done
+-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build
diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..937dc3e3
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:29:02+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.04053,3.38672,4.34863],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 140,
+ "real_time": 4.9702270754746030e+00,
+ "cpu_time": 4.9699425000000002e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 92,
+ "real_time": 7.5326938101130985e+00,
+ "cpu_time": 7.5326059673913059e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1614,
+ "real_time": 4.3082923906517145e-01,
+ "cpu_time": 4.3081728562577470e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 624,
+ "real_time": 1.1141470943888028e+00,
+ "cpu_time": 1.1141273092948720e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 359,
+ "real_time": 1.9465408347609316e+00,
+ "cpu_time": 1.9465103370473527e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4753,
+ "real_time": 1.4783761252635608e-01,
+ "cpu_time": 1.4782938796549550e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2588,
+ "real_time": 2.7286772899778772e-01,
+ "cpu_time": 2.7285900193199364e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101129,
+ "real_time": 6.9313498568830253e-03,
+ "cpu_time": 6.9310267381265502e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47932,
+ "real_time": 1.4616750139684954e-02,
+ "cpu_time": 1.4616182466828002e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1876,
+ "real_time": 3.1442655476807024e-01,
+ "cpu_time": 3.1440550479744134e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2598,
+ "real_time": 3.2269134486611756e-01,
+ "cpu_time": 3.2267296497305642e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1000,
+ "real_time": 5.9975032135844231e-01,
+ "cpu_time": 5.9970746099999950e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1107,
+ "real_time": 5.4013467281922201e-01,
+ "cpu_time": 5.4010315356820238e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 670,
+ "real_time": 9.5614417275386065e-01,
+ "cpu_time": 9.5608898805969977e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 701,
+ "real_time": 9.5555469521272196e-01,
+ "cpu_time": 9.5552639372325310e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4869,
+ "real_time": 1.4373984491587616e-01,
+ "cpu_time": 1.4373398007804475e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3020,
+ "real_time": 2.3255034763094606e-01,
+ "cpu_time": 2.3254246953642435e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3017,
+ "real_time": 2.3421093841281837e-01,
+ "cpu_time": 2.3420040371229658e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2570,
+ "real_time": 2.7231803663973680e-01,
+ "cpu_time": 2.7231148404669192e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2568,
+ "real_time": 2.7264940416051592e-01,
+ "cpu_time": 2.7263315109034342e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2637,
+ "real_time": 2.6645213591586836e-01,
+ "cpu_time": 2.6643527948426282e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4955,
+ "real_time": 1.4140665997289625e-01,
+ "cpu_time": 1.4139799172553003e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..0aab2d9d
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:29:02+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.04, 3.39, 4.35
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 4.97 ms 4.97 ms 140
+MLIR_Conv2D/1 7.53 ms 7.53 ms 92
+Buddy_Conv2D/1 0.431 ms 0.431 ms 1614
+Buddy_Corr2D_Constant_Padding/1 1.11 ms 1.11 ms 624
+OpenCV_Filter2D_Constant_Padding/1 1.95 ms 1.95 ms 359
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4753
+Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2588
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101129
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47932
+Buddy_Erosion2D_Constant_Padding/1 0.314 ms 0.314 ms 1876
+Buddy_Dilation2D_Constant_Padding/1 0.323 ms 0.323 ms 2598
+Buddy_Opening2D_Constant_Padding/1 0.600 ms 0.600 ms 1000
+Buddy_Closing2D_Constant_Padding/1 0.540 ms 0.540 ms 1107
+Buddy_TopHat2D_Constant_Padding/1 0.956 ms 0.956 ms 670
+Buddy_BottomHat2D_Constant_Padding/1 0.956 ms 0.956 ms 701
+OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4869
+OpenCV_Opening2D_Constant_Padding/1 0.233 ms 0.233 ms 3020
+OpenCV_Closing2D_Constant_Padding/1 0.234 ms 0.234 ms 3017
+OpenCV_TopHat2D_Constant_Padding/1 0.272 ms 0.272 ms 2570
+OpenCV_BottomHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2568
+OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2637
+OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4955
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..825c9e79
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,349 @@
+{
+ "context": {
+ "date": "2025-09-07T14:29:26+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.02539,3.35449,4.31201],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 109,
+ "real_time": 4.9761295523665368e+00,
+ "cpu_time": 4.9760039174311919e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 93,
+ "real_time": 7.5348360083436452e+00,
+ "cpu_time": 7.5346214623655925e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1613,
+ "real_time": 4.3241579896237492e-01,
+ "cpu_time": 4.3240159950402979e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 621,
+ "real_time": 1.1201563460719375e+00,
+ "cpu_time": 1.1201345571658614e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 358,
+ "real_time": 1.9431075591115312e+00,
+ "cpu_time": 1.9430616452513958e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4730,
+ "real_time": 1.4787029561608336e-01,
+ "cpu_time": 1.4785987547568707e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2582,
+ "real_time": 2.7276169563521718e-01,
+ "cpu_time": 2.7275317970565449e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100345,
+ "real_time": 6.9529070208036907e-03,
+ "cpu_time": 6.9522148388061148e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47937,
+ "real_time": 1.4593899464516223e-02,
+ "cpu_time": 1.4593418570206736e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2390,
+ "real_time": 2.9568342203625075e-01,
+ "cpu_time": 2.9566401338912135e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2256,
+ "real_time": 2.9021860859918258e-01,
+ "cpu_time": 2.9020803501773085e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 882,
+ "real_time": 6.7151636898923084e-01,
+ "cpu_time": 6.7144004875283347e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1000,
+ "real_time": 6.1067149415612221e-01,
+ "cpu_time": 6.1064459099999979e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 460,
+ "real_time": 1.3535791765088621e+00,
+ "cpu_time": 1.3534489391304345e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 753,
+ "real_time": 9.4793929877984096e-01,
+ "cpu_time": 9.4790966268260402e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4882,
+ "real_time": 1.4292015489961513e-01,
+ "cpu_time": 1.4291047501024184e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3137,
+ "real_time": 2.2372837479151775e-01,
+ "cpu_time": 2.2371705769843761e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3091,
+ "real_time": 2.2564606758546227e-01,
+ "cpu_time": 2.2562969718537662e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2642,
+ "real_time": 2.6471110273725063e-01,
+ "cpu_time": 2.6470357607872869e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2626,
+ "real_time": 2.7250474842331923e-01,
+ "cpu_time": 2.7248917098248282e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2667,
+ "real_time": 2.6142182346135878e-01,
+ "cpu_time": 2.6141412335957992e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4904,
+ "real_time": 1.4250244045870145e-01,
+ "cpu_time": 1.4249685929853156e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
+
diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..97464ce6
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:29:26+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.03, 3.35, 4.31
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 4.98 ms 4.98 ms 109
+MLIR_Conv2D/1 7.53 ms 7.53 ms 93
+Buddy_Conv2D/1 0.432 ms 0.432 ms 1613
+Buddy_Corr2D_Constant_Padding/1 1.12 ms 1.12 ms 621
+OpenCV_Filter2D_Constant_Padding/1 1.94 ms 1.94 ms 358
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4730
+Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2582
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100345
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47937
+Buddy_Erosion2D_Constant_Padding/1 0.296 ms 0.296 ms 2390
+Buddy_Dilation2D_Constant_Padding/1 0.290 ms 0.290 ms 2256
+Buddy_Opening2D_Constant_Padding/1 0.672 ms 0.671 ms 882
+Buddy_Closing2D_Constant_Padding/1 0.611 ms 0.611 ms 1000
+Buddy_TopHat2D_Constant_Padding/1 1.35 ms 1.35 ms 460
+Buddy_BottomHat2D_Constant_Padding/1 0.948 ms 0.948 ms 753
+OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4882
+OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3137
+OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3091
+OpenCV_TopHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2642
+OpenCV_BottomHat2D_Constant_Padding/1 0.273 ms 0.272 ms 2626
+OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2667
+OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.142 ms 4904
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..1dc67624
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:29:50+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.01562,3.32471,4.27539],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 52,
+ "real_time": 1.1901946451801519e+01,
+ "cpu_time": 1.1901702288461539e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 23,
+ "real_time": 3.0438764263754305e+01,
+ "cpu_time": 3.0438311043478276e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 637,
+ "real_time": 1.1142698910490871e+00,
+ "cpu_time": 1.1142385604395602e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 375,
+ "real_time": 1.8523898224035900e+00,
+ "cpu_time": 1.8523744240000006e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 249,
+ "real_time": 2.7968978965617568e+00,
+ "cpu_time": 2.7968237389558248e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4716,
+ "real_time": 1.4841855141042753e-01,
+ "cpu_time": 1.4841650890585228e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2583,
+ "real_time": 2.7219368201838046e-01,
+ "cpu_time": 2.7218788850174197e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100958,
+ "real_time": 6.9158962019867246e-03,
+ "cpu_time": 6.9157541452881457e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47295,
+ "real_time": 1.4739391594914103e-02,
+ "cpu_time": 1.4739279247277717e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1902,
+ "real_time": 3.5209773396279909e-01,
+ "cpu_time": 3.5206602313354285e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2372,
+ "real_time": 2.8494088541446205e-01,
+ "cpu_time": 2.8493101180438485e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1419,
+ "real_time": 5.2841257374836070e-01,
+ "cpu_time": 5.2840475828047939e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1000,
+ "real_time": 5.3209472447633743e-01,
+ "cpu_time": 5.3207190200000021e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 726,
+ "real_time": 9.2559484841902395e-01,
+ "cpu_time": 9.2558814462809835e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 722,
+ "real_time": 9.3495421152861169e-01,
+ "cpu_time": 9.3492223961218790e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4207,
+ "real_time": 1.6627396342446635e-01,
+ "cpu_time": 1.6627164606608016e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2721,
+ "real_time": 2.5683478785980629e-01,
+ "cpu_time": 2.5682696435134150e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2682,
+ "real_time": 2.5946399131699521e-01,
+ "cpu_time": 2.5946211446681705e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2392,
+ "real_time": 2.9280804172407426e-01,
+ "cpu_time": 2.9279882984949784e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2392,
+ "real_time": 2.9102603851951486e-01,
+ "cpu_time": 2.9099642642140494e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2374,
+ "real_time": 2.9361070696305303e-01,
+ "cpu_time": 2.9360214321819761e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4301,
+ "real_time": 1.6287719699649639e-01,
+ "cpu_time": 1.6287602022785452e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..09a24a36
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:29:50+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.02, 3.32, 4.28
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 11.9 ms 11.9 ms 52
+MLIR_Conv2D/1 30.4 ms 30.4 ms 23
+Buddy_Conv2D/1 1.11 ms 1.11 ms 637
+Buddy_Corr2D_Constant_Padding/1 1.85 ms 1.85 ms 375
+OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 249
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4716
+Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2583
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100958
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47295
+Buddy_Erosion2D_Constant_Padding/1 0.352 ms 0.352 ms 1902
+Buddy_Dilation2D_Constant_Padding/1 0.285 ms 0.285 ms 2372
+Buddy_Opening2D_Constant_Padding/1 0.528 ms 0.528 ms 1419
+Buddy_Closing2D_Constant_Padding/1 0.532 ms 0.532 ms 1000
+Buddy_TopHat2D_Constant_Padding/1 0.926 ms 0.926 ms 726
+Buddy_BottomHat2D_Constant_Padding/1 0.935 ms 0.935 ms 722
+OpenCV_Erode2D_Constant_Padding/1 0.166 ms 0.166 ms 4207
+OpenCV_Opening2D_Constant_Padding/1 0.257 ms 0.257 ms 2721
+OpenCV_Closing2D_Constant_Padding/1 0.259 ms 0.259 ms 2682
+OpenCV_TopHat2D_Constant_Padding/1 0.293 ms 0.293 ms 2392
+OpenCV_BottomHat2D_Constant_Padding/1 0.291 ms 0.291 ms 2392
+OpenCV_MorphGrad2D_Constant_Padding/1 0.294 ms 0.294 ms 2374
+OpenCV_Dilate2D_Constant_Padding/1 0.163 ms 0.163 ms 4301
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..07ef45e8
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:30:14+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.08984,3.31445,4.24609],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 58,
+ "real_time": 1.1998113887063388e+01,
+ "cpu_time": 1.1997172689655171e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 23,
+ "real_time": 3.0430977273246516e+01,
+ "cpu_time": 3.0430230913043474e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 694,
+ "real_time": 9.3945750284950735e-01,
+ "cpu_time": 9.3939238472622510e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 379,
+ "real_time": 1.8339350839246231e+00,
+ "cpu_time": 1.8338942585751978e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 249,
+ "real_time": 2.7970489727924148e+00,
+ "cpu_time": 2.7968971004016070e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4735,
+ "real_time": 1.4771520226029433e-01,
+ "cpu_time": 1.4771250242872222e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2581,
+ "real_time": 2.7436091312370020e-01,
+ "cpu_time": 2.7433653428903537e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101032,
+ "real_time": 6.9237584291694564e-03,
+ "cpu_time": 6.9233851156069301e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47897,
+ "real_time": 1.4611094101790878e-02,
+ "cpu_time": 1.4609948556276994e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2315,
+ "real_time": 3.1752810043083668e-01,
+ "cpu_time": 3.1751235248380194e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1991,
+ "real_time": 3.0030406326699172e-01,
+ "cpu_time": 3.0026843294826744e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1162,
+ "real_time": 5.2669528301119184e-01,
+ "cpu_time": 5.2667063166953465e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1147,
+ "real_time": 5.8120846059806675e-01,
+ "cpu_time": 5.8116856669572725e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 742,
+ "real_time": 9.3807371438674203e-01,
+ "cpu_time": 9.3802853369272166e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 731,
+ "real_time": 9.1354743094679103e-01,
+ "cpu_time": 9.1349827770177838e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4901,
+ "real_time": 1.4285111738647449e-01,
+ "cpu_time": 1.4284396123240140e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2990,
+ "real_time": 2.3436977786563312e-01,
+ "cpu_time": 2.3436707424749126e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2977,
+ "real_time": 2.3498714375624050e-01,
+ "cpu_time": 2.3497187302653738e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2568,
+ "real_time": 2.7303778743400381e-01,
+ "cpu_time": 2.7303231347352053e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2572,
+ "real_time": 2.6648068832433430e-01,
+ "cpu_time": 2.6646258125972005e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2624,
+ "real_time": 2.6757532884026081e-01,
+ "cpu_time": 2.6757154878048883e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4938,
+ "real_time": 1.4144324261126320e-01,
+ "cpu_time": 1.4143231227217509e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..e95c5e72
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,57 @@
+2025-09-07T14:30:14+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.09, 3.31, 4.25
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 12.0 ms 12.0 ms 58
+MLIR_Conv2D/1 30.4 ms 30.4 ms 23
+Buddy_Conv2D/1 0.939 ms 0.939 ms 694
+Buddy_Corr2D_Constant_Padding/1 1.83 ms 1.83 ms 379
+OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 249
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4735
+Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2581
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101032
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47897
+Buddy_Erosion2D_Constant_Padding/1 0.318 ms 0.318 ms 2315
+Buddy_Dilation2D_Constant_Padding/1 0.300 ms 0.300 ms 1991
+Buddy_Opening2D_Constant_Padding/1 0.527 ms 0.527 ms 1162
+Buddy_Closing2D_Constant_Padding/1 0.581 ms 0.581 ms 1147
+Buddy_TopHat2D_Constant_Padding/1 0.938 ms 0.938 ms 742
+Buddy_BottomHat2D_Constant_Padding/1 0.914 ms 0.913 ms 731
+OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4901
+OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 2990
+OpenCV_Closing2D_Constant_Padding/1 0.235 ms 0.235 ms 2977
+OpenCV_TopHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2568
+OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2572
+OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2624
+OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4938
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+ERROR: Can't save PNG file.
+ERROR: Can't save PNG file.
+ERROR: Can't save PNG file.
+ERROR: Can't save PNG file.
+ERROR: Can't save PNG file.
+ERROR: Can't save PNG file.
+ERROR: Can't save PNG file.
+Exception converting image to PNG format.
+ERROR: Can't save PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..97022f6f
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:25:01+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [4.38672,3.83301,4.74805],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 136,
+ "real_time": 5.0944548702853565e+00,
+ "cpu_time": 5.0941901617647067e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 92,
+ "real_time": 7.5879288108452503e+00,
+ "cpu_time": 7.5877727717391323e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2126,
+ "real_time": 3.2648893102989485e-01,
+ "cpu_time": 3.2648295437441222e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 810,
+ "real_time": 8.6173117160797119e-01,
+ "cpu_time": 8.6171702345678991e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 532,
+ "real_time": 1.3098518987347310e+00,
+ "cpu_time": 1.3098337011278192e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4693,
+ "real_time": 1.4864658349845142e-01,
+ "cpu_time": 1.4864443596846369e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2574,
+ "real_time": 2.7232925843942418e-01,
+ "cpu_time": 2.7232468376068386e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101147,
+ "real_time": 6.9248195898291617e-03,
+ "cpu_time": 6.9247274758519840e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47876,
+ "real_time": 1.4619754012793923e-02,
+ "cpu_time": 1.4619547852786375e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1964,
+ "real_time": 3.4232623116426214e-01,
+ "cpu_time": 3.4231908757637480e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1987,
+ "real_time": 3.5313764707971351e-01,
+ "cpu_time": 3.5311560694514321e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1000,
+ "real_time": 7.3933277651667595e-01,
+ "cpu_time": 7.3929136599999978e-01,
+ "time_unit": "ms"
+ ,,
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1338,
+ "real_time": 6.8773021879752116e-01,
+ "cpu_time": 6.8761599701046450e-01,
+ "time_unit": "ms"
+ ,,
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 449,
+ "real_time": 1.3737666746290329e+00,
+ "cpu_time": 1.3737399821826268e+00,
+ "time_unit": "ms"
+ ,,
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 451,
+ "real_time": 1.1508310349976145e+00,
+ "cpu_time": 1.1508074767184044e+00,
+ "time_unit": "ms"
+ ,,
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4246,
+ "real_time": 1.6527650926088422e-01,
+ "cpu_time": 1.6527523033443248e-01,
+ "time_unit": "ms"
+ ,,
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2717,
+ "real_time": 2.5677720045709945e-01,
+ "cpu_time": 2.5677142694147959e-01,
+ "time_unit": "ms"
+ ,,
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2783,
+ "real_time": 2.5039162999470799e-01,
+ "cpu_time": 2.5038794178943619e-01,
+ "time_unit": "ms"
+ ,,
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2439,
+ "real_time": 2.8731209325614449e-01,
+ "cpu_time": 2.8730276875768884e-01,
+ "time_unit": "ms"
+ ,,
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2429,
+ "real_time": 2.8720653292670473e-01,
+ "cpu_time": 2.8719969699464831e-01,
+ "time_unit": "ms"
+ ,,
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2460,
+ "real_time": 2.8502542555816773e-01,
+ "cpu_time": 2.8502001626016238e-01,
+ "time_unit": "ms"
+ ,,
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4294,
+ "real_time": 1.6273279480452199e-01,
+ "cpu_time": 1.6272675011644155e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..d7b44d35
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:25:01+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 4.39, 3.83, 4.75
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 5.09 ms 5.09 ms 136
+MLIR_Conv2D/1 7.59 ms 7.59 ms 92
+Buddy_Conv2D/1 0.326 ms 0.326 ms 2126
+Buddy_Corr2D_Constant_Padding/1 0.862 ms 0.862 ms 810
+OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 532
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4693
+Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2574
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101147
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47876
+Buddy_Erosion2D_Constant_Padding/1 0.342 ms 0.342 ms 1964
+Buddy_Dilation2D_Constant_Padding/1 0.353 ms 0.353 ms 1987
+Buddy_Opening2D_Constant_Padding/1 0.739 ms 0.739 ms 1000
+Buddy_Closing2D_Constant_Padding/1 0.688 ms 0.688 ms 1338
+Buddy_TopHat2D_Constant_Padding/1 1.37 ms 1.37 ms 449
+Buddy_BottomHat2D_Constant_Padding/1 1.15 ms 1.15 ms 451
+OpenCV_Erode2D_Constant_Padding/1 0.165 ms 0.165 ms 4246
+OpenCV_Opening2D_Constant_Padding/1 0.257 ms 0.257 ms 2717
+OpenCV_Closing2D_Constant_Padding/1 0.250 ms 0.250 ms 2783
+OpenCV_TopHat2D_Constant_Padding/1 0.287 ms 0.287 ms 2439
+OpenCV_BottomHat2D_Constant_Padding/1 0.287 ms 0.287 ms 2429
+OpenCV_MorphGrad2D_Constant_Padding/1 0.285 ms 0.285 ms 2460
+OpenCV_Dilate2D_Constant_Padding/1 0.163 ms 0.163 ms 4294
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..16b026ca
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:25:25+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.9126,3.76514,4.7002],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 140,
+ "real_time": 4.9801771395972798e+00,
+ "cpu_time": 4.9799727642857139e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 92,
+ "real_time": 7.5637138570132461e+00,
+ "cpu_time": 7.5633655869565199e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1928,
+ "real_time": 3.6541006771298862e-01,
+ "cpu_time": 3.6539922977178435e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 814,
+ "real_time": 8.6126693453484149e-01,
+ "cpu_time": 8.6124013022113044e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 533,
+ "real_time": 1.3084959091992583e+00,
+ "cpu_time": 1.3084598105065661e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4720,
+ "real_time": 1.4866878815247850e-01,
+ "cpu_time": 1.4866191355932201e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2570,
+ "real_time": 2.7357344062643757e-01,
+ "cpu_time": 2.7355898560311276e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100923,
+ "real_time": 6.9336138939047895e-03,
+ "cpu_time": 6.9332814323791450e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47371,
+ "real_time": 1.4785069694910813e-02,
+ "cpu_time": 1.4784267484325859e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1930,
+ "real_time": 3.5314525771944016e-01,
+ "cpu_time": 3.5312864974093228e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2161,
+ "real_time": 3.0299739835439926e-01,
+ "cpu_time": 3.0298353354928237e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1325,
+ "real_time": 5.3083810322689562e-01,
+ "cpu_time": 5.3080599698113229e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1097,
+ "real_time": 5.6190424706572928e-01,
+ "cpu_time": 5.6186523518687359e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 737,
+ "real_time": 9.5735288135393992e-01,
+ "cpu_time": 9.5733702578019098e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 710,
+ "real_time": 9.2390114047997429e-01,
+ "cpu_time": 9.2382159295774735e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4222,
+ "real_time": 1.6582871410926897e-01,
+ "cpu_time": 1.6582006608242525e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2852,
+ "real_time": 2.4506063847103762e-01,
+ "cpu_time": 2.4504969950911681e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2864,
+ "real_time": 2.4439445982931712e-01,
+ "cpu_time": 2.4438653247206729e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2570,
+ "real_time": 2.7270443331638661e-01,
+ "cpu_time": 2.7268578988326853e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2597,
+ "real_time": 2.7151760983384476e-01,
+ "cpu_time": 2.7150931959953850e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2439,
+ "real_time": 2.8630925549401176e-01,
+ "cpu_time": 2.8630299056990677e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4308,
+ "real_time": 1.6245689889902082e-01,
+ "cpu_time": 1.6245064832869063e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..a4f8253f
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:25:25+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.91, 3.77, 4.70
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 4.98 ms 4.98 ms 140
+MLIR_Conv2D/1 7.56 ms 7.56 ms 92
+Buddy_Conv2D/1 0.365 ms 0.365 ms 1928
+Buddy_Corr2D_Constant_Padding/1 0.861 ms 0.861 ms 814
+OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 533
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4720
+Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2570
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100923
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47371
+Buddy_Erosion2D_Constant_Padding/1 0.353 ms 0.353 ms 1930
+Buddy_Dilation2D_Constant_Padding/1 0.303 ms 0.303 ms 2161
+Buddy_Opening2D_Constant_Padding/1 0.531 ms 0.531 ms 1325
+Buddy_Closing2D_Constant_Padding/1 0.562 ms 0.562 ms 1097
+Buddy_TopHat2D_Constant_Padding/1 0.957 ms 0.957 ms 737
+Buddy_BottomHat2D_Constant_Padding/1 0.924 ms 0.924 ms 710
+OpenCV_Erode2D_Constant_Padding/1 0.166 ms 0.166 ms 4222
+OpenCV_Opening2D_Constant_Padding/1 0.245 ms 0.245 ms 2852
+OpenCV_Closing2D_Constant_Padding/1 0.244 ms 0.244 ms 2864
+OpenCV_TopHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2570
+OpenCV_BottomHat2D_Constant_Padding/1 0.272 ms 0.272 ms 2597
+OpenCV_MorphGrad2D_Constant_Padding/1 0.286 ms 0.286 ms 2439
+OpenCV_Dilate2D_Constant_Padding/1 0.162 ms 0.162 ms 4308
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..583475bb
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:25:50+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.6001,3.70215,4.65381],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 137,
+ "real_time": 5.0967205723706819e+00,
+ "cpu_time": 5.0964819489051107e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 91,
+ "real_time": 7.6128631257093868e+00,
+ "cpu_time": 7.6126163516483496e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1997,
+ "real_time": 3.4362500795556356e-01,
+ "cpu_time": 3.4358477866800197e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 805,
+ "real_time": 8.6478873717118498e-01,
+ "cpu_time": 8.6472154285714264e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 529,
+ "real_time": 1.3110746324963740e+00,
+ "cpu_time": 1.3110105841209827e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4716,
+ "real_time": 1.5283490043762279e-01,
+ "cpu_time": 1.5283251993214594e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2554,
+ "real_time": 2.7269076267162523e-01,
+ "cpu_time": 2.7268682106499587e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101059,
+ "real_time": 6.9261688433581575e-03,
+ "cpu_time": 6.9260731255998999e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47776,
+ "real_time": 1.4638210093503979e-02,
+ "cpu_time": 1.4638031208137982e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1929,
+ "real_time": 2.9913433178092852e-01,
+ "cpu_time": 2.9912855054432330e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2459,
+ "real_time": 3.0520153711025383e-01,
+ "cpu_time": 3.0519616063440402e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1338,
+ "real_time": 5.4847415096734853e-01,
+ "cpu_time": 5.4846282810164348e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1000,
+ "real_time": 5.5285461992025375e-01,
+ "cpu_time": 5.5284405199999931e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 716,
+ "real_time": 9.7495791445064806e-01,
+ "cpu_time": 9.7494059916201126e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 692,
+ "real_time": 9.6457034909311745e-01,
+ "cpu_time": 9.6455439884393179e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4887,
+ "real_time": 1.4370574812374892e-01,
+ "cpu_time": 1.4370325148352775e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3079,
+ "real_time": 2.2928564821059302e-01,
+ "cpu_time": 2.2928312731406281e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3082,
+ "real_time": 2.2956116836926599e-01,
+ "cpu_time": 2.2955781213497742e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2636,
+ "real_time": 2.6566850951390852e-01,
+ "cpu_time": 2.6566524696509775e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2623,
+ "real_time": 2.6680013712380524e-01,
+ "cpu_time": 2.6679694357605821e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2663,
+ "real_time": 2.6179852199831988e-01,
+ "cpu_time": 2.6179651633496065e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4965,
+ "real_time": 1.4100358385333842e-01,
+ "cpu_time": 1.4100249889224539e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..96c6d225
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:25:50+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.60, 3.70, 4.65
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 5.10 ms 5.10 ms 137
+MLIR_Conv2D/1 7.61 ms 7.61 ms 91
+Buddy_Conv2D/1 0.344 ms 0.344 ms 1997
+Buddy_Corr2D_Constant_Padding/1 0.865 ms 0.865 ms 805
+OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 529
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.153 ms 0.153 ms 4716
+Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2554
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101059
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47776
+Buddy_Erosion2D_Constant_Padding/1 0.299 ms 0.299 ms 1929
+Buddy_Dilation2D_Constant_Padding/1 0.305 ms 0.305 ms 2459
+Buddy_Opening2D_Constant_Padding/1 0.548 ms 0.548 ms 1338
+Buddy_Closing2D_Constant_Padding/1 0.553 ms 0.553 ms 1000
+Buddy_TopHat2D_Constant_Padding/1 0.975 ms 0.975 ms 716
+Buddy_BottomHat2D_Constant_Padding/1 0.965 ms 0.965 ms 692
+OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4887
+OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3079
+OpenCV_Closing2D_Constant_Padding/1 0.230 ms 0.230 ms 3082
+OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2636
+OpenCV_BottomHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2623
+OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2663
+OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4965
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..b1ffde62
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:26:14+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.39404,3.64453,4.60889],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 121,
+ "real_time": 5.1075547071527847e+00,
+ "cpu_time": 5.1073665289256196e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 93,
+ "real_time": 7.6060896358823262e+00,
+ "cpu_time": 7.6059665161290306e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2123,
+ "real_time": 3.2846411506746714e-01,
+ "cpu_time": 3.2843848233631667e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 817,
+ "real_time": 8.6353970019003168e-01,
+ "cpu_time": 8.6350830844553239e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 533,
+ "real_time": 1.3113399062438484e+00,
+ "cpu_time": 1.3112924840525322e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4732,
+ "real_time": 1.4925754329131402e-01,
+ "cpu_time": 1.4925238377007605e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2557,
+ "real_time": 2.7432158665121836e-01,
+ "cpu_time": 2.7429795150567055e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101042,
+ "real_time": 6.9291528471928992e-03,
+ "cpu_time": 6.9288272599513055e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47868,
+ "real_time": 1.4627723422845006e-02,
+ "cpu_time": 1.4627092149243754e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2430,
+ "real_time": 2.8533444973666972e-01,
+ "cpu_time": 2.8531405061728438e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2094,
+ "real_time": 3.1473711951357358e-01,
+ "cpu_time": 3.1470967144221584e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1014,
+ "real_time": 5.7866645120892535e-01,
+ "cpu_time": 5.7864507199211168e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1111,
+ "real_time": 5.6143095641389396e-01,
+ "cpu_time": 5.6142596129612876e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 716,
+ "real_time": 9.7362239787864946e-01,
+ "cpu_time": 9.7356560335195652e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 685,
+ "real_time": 9.8786628159293288e-01,
+ "cpu_time": 9.8785897518248145e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4300,
+ "real_time": 1.4379027624462926e-01,
+ "cpu_time": 1.4378212465116308e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3063,
+ "real_time": 2.2853155405428766e-01,
+ "cpu_time": 2.2852091609533151e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3071,
+ "real_time": 2.2941528513508408e-01,
+ "cpu_time": 2.2940828427222390e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2523,
+ "real_time": 2.6365732489906035e-01,
+ "cpu_time": 2.6365356282203672e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2656,
+ "real_time": 2.6352780433483869e-01,
+ "cpu_time": 2.6352464533132480e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2657,
+ "real_time": 2.6354999174558447e-01,
+ "cpu_time": 2.6354654459917209e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4948,
+ "real_time": 1.4165986747922873e-01,
+ "cpu_time": 1.4165828880355658e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..6a502cdc
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:26:14+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.39, 3.64, 4.61
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 5.11 ms 5.11 ms 121
+MLIR_Conv2D/1 7.61 ms 7.61 ms 93
+Buddy_Conv2D/1 0.328 ms 0.328 ms 2123
+Buddy_Corr2D_Constant_Padding/1 0.864 ms 0.864 ms 817
+OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 533
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4732
+Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2557
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101042
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47868
+Buddy_Erosion2D_Constant_Padding/1 0.285 ms 0.285 ms 2430
+Buddy_Dilation2D_Constant_Padding/1 0.315 ms 0.315 ms 2094
+Buddy_Opening2D_Constant_Padding/1 0.579 ms 0.579 ms 1014
+Buddy_Closing2D_Constant_Padding/1 0.561 ms 0.561 ms 1111
+Buddy_TopHat2D_Constant_Padding/1 0.974 ms 0.974 ms 716
+Buddy_BottomHat2D_Constant_Padding/1 0.988 ms 0.988 ms 685
+OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4300
+OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3063
+OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3071
+OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2523
+OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2656
+OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2657
+OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4948
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..7cbd4e18
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:26:37+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.28125,3.60205,4.57373],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 56,
+ "real_time": 1.2067083735018969e+01,
+ "cpu_time": 1.2066812107142857e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 23,
+ "real_time": 3.0612203414025515e+01,
+ "cpu_time": 3.0611188304347817e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 568,
+ "real_time": 1.3034208284192521e+00,
+ "cpu_time": 1.3033825299295774e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 282,
+ "real_time": 2.4699879717742297e+00,
+ "cpu_time": 2.4699094858156023e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 162,
+ "real_time": 4.3287115562477227e+00,
+ "cpu_time": 4.3285012407407431e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4700,
+ "real_time": 1.4935964916614777e-01,
+ "cpu_time": 1.4935194957446807e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2552,
+ "real_time": 2.7521437495292916e-01,
+ "cpu_time": 2.7520046473354215e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100337,
+ "real_time": 6.9741815465525284e-03,
+ "cpu_time": 6.9738547993262777e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 46773,
+ "real_time": 1.4982721659303913e-02,
+ "cpu_time": 1.4982278921600070e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2050,
+ "real_time": 3.1215877976359391e-01,
+ "cpu_time": 3.1211740390243931e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2432,
+ "real_time": 2.9099147190879049e-01,
+ "cpu_time": 2.9097846422697360e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1079,
+ "real_time": 5.8892201131643029e-01,
+ "cpu_time": 5.8888681742354165e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1200,
+ "real_time": 5.7249554432928562e-01,
+ "cpu_time": 5.7247323416666751e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 693,
+ "real_time": 9.6331589450739852e-01,
+ "cpu_time": 9.6324691630591597e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 720,
+ "real_time": 9.4459417483044994e-01,
+ "cpu_time": 9.4454029166666720e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4811,
+ "real_time": 1.4385227303672446e-01,
+ "cpu_time": 1.4383307960922870e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3023,
+ "real_time": 2.3205718441471690e-01,
+ "cpu_time": 2.3204768309626247e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3090,
+ "real_time": 2.2930755222692459e-01,
+ "cpu_time": 2.2929591650485445e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2569,
+ "real_time": 2.6998296670061739e-01,
+ "cpu_time": 2.6997637757882353e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2579,
+ "real_time": 2.6765619283639541e-01,
+ "cpu_time": 2.6764102830554409e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2657,
+ "real_time": 2.6501084954432630e-01,
+ "cpu_time": 2.6500044373353410e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4921,
+ "real_time": 1.4214790953269119e-01,
+ "cpu_time": 1.4214541942694606e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..381be4b4
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:26:37+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.28, 3.60, 4.57
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 12.1 ms 12.1 ms 56
+MLIR_Conv2D/1 30.6 ms 30.6 ms 23
+Buddy_Conv2D/1 1.30 ms 1.30 ms 568
+Buddy_Corr2D_Constant_Padding/1 2.47 ms 2.47 ms 282
+OpenCV_Filter2D_Constant_Padding/1 4.33 ms 4.33 ms 162
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4700
+Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2552
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100337
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 46773
+Buddy_Erosion2D_Constant_Padding/1 0.312 ms 0.312 ms 2050
+Buddy_Dilation2D_Constant_Padding/1 0.291 ms 0.291 ms 2432
+Buddy_Opening2D_Constant_Padding/1 0.589 ms 0.589 ms 1079
+Buddy_Closing2D_Constant_Padding/1 0.572 ms 0.572 ms 1200
+Buddy_TopHat2D_Constant_Padding/1 0.963 ms 0.963 ms 693
+Buddy_BottomHat2D_Constant_Padding/1 0.945 ms 0.945 ms 720
+OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4811
+OpenCV_Opening2D_Constant_Padding/1 0.232 ms 0.232 ms 3023
+OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3090
+OpenCV_TopHat2D_Constant_Padding/1 0.270 ms 0.270 ms 2569
+OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2579
+OpenCV_MorphGrad2D_Constant_Padding/1 0.265 ms 0.265 ms 2657
+OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4921
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..cd4dab75
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:27:01+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.31982,3.58496,4.54102],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 59,
+ "real_time": 1.1948739699387955e+01,
+ "cpu_time": 1.1948392644067797e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 23,
+ "real_time": 3.0476028666548107e+01,
+ "cpu_time": 3.0474844999999998e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 562,
+ "real_time": 1.2884445793369912e+00,
+ "cpu_time": 1.2884208736654807e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 284,
+ "real_time": 2.4603739590711995e+00,
+ "cpu_time": 2.4602806619718312e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 163,
+ "real_time": 4.2872666581276739e+00,
+ "cpu_time": 4.2871032392638035e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4715,
+ "real_time": 1.4838221327876741e-01,
+ "cpu_time": 1.4837547020148453e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2576,
+ "real_time": 2.7262466462178631e-01,
+ "cpu_time": 2.7260505046583849e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101005,
+ "real_time": 6.9283273903298307e-03,
+ "cpu_time": 6.9277283500816817e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47760,
+ "real_time": 1.4637780439738853e-02,
+ "cpu_time": 1.4637589635678379e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2247,
+ "real_time": 2.7769743156104176e-01,
+ "cpu_time": 2.7768090075656437e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2603,
+ "real_time": 3.3085079827173097e-01,
+ "cpu_time": 3.3083193046484832e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1077,
+ "real_time": 5.8244004112321579e-01,
+ "cpu_time": 5.8239145868152165e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1171,
+ "real_time": 5.6926543123695605e-01,
+ "cpu_time": 5.6925410674637089e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 737,
+ "real_time": 9.5344758927417739e-01,
+ "cpu_time": 9.5340984531885853e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 722,
+ "real_time": 9.4657656001417256e-01,
+ "cpu_time": 9.4650936842105216e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4886,
+ "real_time": 1.4358902494169579e-01,
+ "cpu_time": 1.4358145968072070e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2978,
+ "real_time": 2.3539010072571068e-01,
+ "cpu_time": 2.3538351511081329e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2979,
+ "real_time": 2.3258523218341223e-01,
+ "cpu_time": 2.3257072171869730e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2603,
+ "real_time": 2.6823798372706131e-01,
+ "cpu_time": 2.6822945024971095e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2574,
+ "real_time": 2.7231369876731898e-01,
+ "cpu_time": 2.7229450038850062e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2619,
+ "real_time": 2.6665248173072437e-01,
+ "cpu_time": 2.6664444520809394e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4960,
+ "real_time": 1.4122858448254486e-01,
+ "cpu_time": 1.4122089999999995e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..6504a841
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:27:01+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.32, 3.58, 4.54
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 11.9 ms 11.9 ms 59
+MLIR_Conv2D/1 30.5 ms 30.5 ms 23
+Buddy_Conv2D/1 1.29 ms 1.29 ms 562
+Buddy_Corr2D_Constant_Padding/1 2.46 ms 2.46 ms 284
+OpenCV_Filter2D_Constant_Padding/1 4.29 ms 4.29 ms 163
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4715
+Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2576
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101005
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47760
+Buddy_Erosion2D_Constant_Padding/1 0.278 ms 0.278 ms 2247
+Buddy_Dilation2D_Constant_Padding/1 0.331 ms 0.331 ms 2603
+Buddy_Opening2D_Constant_Padding/1 0.582 ms 0.582 ms 1077
+Buddy_Closing2D_Constant_Padding/1 0.569 ms 0.569 ms 1171
+Buddy_TopHat2D_Constant_Padding/1 0.953 ms 0.953 ms 737
+Buddy_BottomHat2D_Constant_Padding/1 0.947 ms 0.947 ms 722
+OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4886
+OpenCV_Opening2D_Constant_Padding/1 0.235 ms 0.235 ms 2978
+OpenCV_Closing2D_Constant_Padding/1 0.233 ms 0.233 ms 2979
+OpenCV_TopHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2603
+OpenCV_BottomHat2D_Constant_Padding/1 0.272 ms 0.272 ms 2574
+OpenCV_MorphGrad2D_Constant_Padding/1 0.267 ms 0.267 ms 2619
+OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4960
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..9cd4797c
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:27:26+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.20947,3.53711,4.49951],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 31,
+ "real_time": 2.2631176176571078e+01,
+ "cpu_time": 2.2627850516129033e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 10,
+ "real_time": 7.0389824360609055e+01,
+ "cpu_time": 7.0386717899999994e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 291,
+ "real_time": 2.3570958640157560e+00,
+ "cpu_time": 2.3570359209621992e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 145,
+ "real_time": 4.8382232415265047e+00,
+ "cpu_time": 4.8375873103448299e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 78,
+ "real_time": 8.9898192538664894e+00,
+ "cpu_time": 8.9885258333333269e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4732,
+ "real_time": 1.4900763708836731e-01,
+ "cpu_time": 1.4898793300929827e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2567,
+ "real_time": 2.7684785533053519e-01,
+ "cpu_time": 2.7677605103233366e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100753,
+ "real_time": 7.1818906740909278e-03,
+ "cpu_time": 7.1817220132403070e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47612,
+ "real_time": 1.4661686260374937e-02,
+ "cpu_time": 1.4661306267327555e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2204,
+ "real_time": 3.2491934825471869e-01,
+ "cpu_time": 3.2490066288566188e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2109,
+ "real_time": 3.3516474085828601e-01,
+ "cpu_time": 3.3511585395922289e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1000,
+ "real_time": 6.0891722515225410e-01,
+ "cpu_time": 6.0877587399999911e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1056,
+ "real_time": 5.6265958471957478e-01,
+ "cpu_time": 5.6263823579545469e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 665,
+ "real_time": 1.0144153091692387e+00,
+ "cpu_time": 1.0142662390977437e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 687,
+ "real_time": 9.8415326972368677e-01,
+ "cpu_time": 9.8413417176128126e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4827,
+ "real_time": 1.4460108045936990e-01,
+ "cpu_time": 1.4459874000414299e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3090,
+ "real_time": 2.2893413467314636e-01,
+ "cpu_time": 2.2890716181229737e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3038,
+ "real_time": 2.3062362374248593e-01,
+ "cpu_time": 2.3059413199473325e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2627,
+ "real_time": 2.6741435983174650e-01,
+ "cpu_time": 2.6737748953178492e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2614,
+ "real_time": 2.6675470190460482e-01,
+ "cpu_time": 2.6671898355011386e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2680,
+ "real_time": 2.6224578188648867e-01,
+ "cpu_time": 2.6221102089552206e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4958,
+ "real_time": 1.4103486151188407e-01,
+ "cpu_time": 1.4101694735780548e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..e2188404
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:27:26+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.21, 3.54, 4.50
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 22.6 ms 22.6 ms 31
+MLIR_Conv2D/1 70.4 ms 70.4 ms 10
+Buddy_Conv2D/1 2.36 ms 2.36 ms 291
+Buddy_Corr2D_Constant_Padding/1 4.84 ms 4.84 ms 145
+OpenCV_Filter2D_Constant_Padding/1 8.99 ms 8.99 ms 78
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4732
+Buddy_Resize2D_Bilinear_Interpolation/1 0.277 ms 0.277 ms 2567
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100753
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47612
+Buddy_Erosion2D_Constant_Padding/1 0.325 ms 0.325 ms 2204
+Buddy_Dilation2D_Constant_Padding/1 0.335 ms 0.335 ms 2109
+Buddy_Opening2D_Constant_Padding/1 0.609 ms 0.609 ms 1000
+Buddy_Closing2D_Constant_Padding/1 0.563 ms 0.563 ms 1056
+Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 665
+Buddy_BottomHat2D_Constant_Padding/1 0.984 ms 0.984 ms 687
+OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4827
+OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3090
+OpenCV_Closing2D_Constant_Padding/1 0.231 ms 0.231 ms 3038
+OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2627
+OpenCV_BottomHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2614
+OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2680
+OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4958
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..1dbbecce
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:27:51+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.13721,3.49268,4.45801],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 31,
+ "real_time": 2.2485823640900275e+01,
+ "cpu_time": 2.2485474354838711e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 10,
+ "real_time": 7.0325020700693130e+01,
+ "cpu_time": 7.0323628100000008e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 304,
+ "real_time": 2.3227319930140911e+00,
+ "cpu_time": 2.3226553092105280e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 144,
+ "real_time": 4.8592611629929809e+00,
+ "cpu_time": 4.8591047083333336e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 78,
+ "real_time": 8.9812785004958133e+00,
+ "cpu_time": 8.9809005512820512e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4740,
+ "real_time": 1.4794336765501570e-01,
+ "cpu_time": 1.4793731962025314e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2579,
+ "real_time": 2.7309454177275516e-01,
+ "cpu_time": 2.7308133927879030e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101192,
+ "real_time": 6.9005510118396131e-03,
+ "cpu_time": 6.9002890544707105e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47570,
+ "real_time": 1.4686870232258922e-02,
+ "cpu_time": 1.4686363212108497e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2503,
+ "real_time": 3.0775897315012374e-01,
+ "cpu_time": 3.0774776468238130e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2313,
+ "real_time": 2.9183508988535872e-01,
+ "cpu_time": 2.9181160397751793e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1118,
+ "real_time": 5.0250999962707754e-01,
+ "cpu_time": 5.0250005456171720e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1097,
+ "real_time": 4.7140384098020810e-01,
+ "cpu_time": 4.7139598450319076e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 739,
+ "real_time": 9.4388246395107211e-01,
+ "cpu_time": 9.4387578755074519e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 700,
+ "real_time": 9.3932237476110458e-01,
+ "cpu_time": 9.3930383428571462e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4835,
+ "real_time": 1.4465527980749215e-01,
+ "cpu_time": 1.4465337456049635e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3096,
+ "real_time": 2.3360328763261323e-01,
+ "cpu_time": 2.3359945413436684e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3103,
+ "real_time": 2.2638490458976213e-01,
+ "cpu_time": 2.2637974830808869e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2686,
+ "real_time": 2.6290406343303147e-01,
+ "cpu_time": 2.6289967795979169e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2642,
+ "real_time": 2.6402394037008464e-01,
+ "cpu_time": 2.6402097728993146e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2681,
+ "real_time": 2.6178075412634993e-01,
+ "cpu_time": 2.6177909585975329e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4948,
+ "real_time": 1.4226491018684043e-01,
+ "cpu_time": 1.4226395371867465e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..50b485e5
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:27:51+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.14, 3.49, 4.46
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 22.5 ms 22.5 ms 31
+MLIR_Conv2D/1 70.3 ms 70.3 ms 10
+Buddy_Conv2D/1 2.32 ms 2.32 ms 304
+Buddy_Corr2D_Constant_Padding/1 4.86 ms 4.86 ms 144
+OpenCV_Filter2D_Constant_Padding/1 8.98 ms 8.98 ms 78
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4740
+Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2579
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101192
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47570
+Buddy_Erosion2D_Constant_Padding/1 0.308 ms 0.308 ms 2503
+Buddy_Dilation2D_Constant_Padding/1 0.292 ms 0.292 ms 2313
+Buddy_Opening2D_Constant_Padding/1 0.503 ms 0.503 ms 1118
+Buddy_Closing2D_Constant_Padding/1 0.471 ms 0.471 ms 1097
+Buddy_TopHat2D_Constant_Padding/1 0.944 ms 0.944 ms 739
+Buddy_BottomHat2D_Constant_Padding/1 0.939 ms 0.939 ms 700
+OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4835
+OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 3096
+OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3103
+OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2686
+OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2642
+OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2681
+OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4948
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..7142618a
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:28:14+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.08936,3.45215,4.41846],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 19,
+ "real_time": 3.6411155799501820e+01,
+ "cpu_time": 3.6410112789473686e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 6,
+ "real_time": 1.2272199243307114e+02,
+ "cpu_time": 1.2271996516666665e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 164,
+ "real_time": 4.2625103463850369e+00,
+ "cpu_time": 4.2624786829268295e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 87,
+ "real_time": 8.0896045627265138e+00,
+ "cpu_time": 8.0895408160919526e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 116,
+ "real_time": 6.0323840328331650e+00,
+ "cpu_time": 6.0323151551724177e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4740,
+ "real_time": 1.4794642962358168e-01,
+ "cpu_time": 1.4794533839662441e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2578,
+ "real_time": 2.7431065723866432e-01,
+ "cpu_time": 2.7430675096974405e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101183,
+ "real_time": 6.9150012404555119e-03,
+ "cpu_time": 6.9149498927685484e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47997,
+ "real_time": 1.4597131495282340e-02,
+ "cpu_time": 1.4596917932370775e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2553,
+ "real_time": 2.8082992789608052e-01,
+ "cpu_time": 2.8082769095182175e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2317,
+ "real_time": 2.6909916641180198e-01,
+ "cpu_time": 2.6909711523521840e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1198,
+ "real_time": 4.8579790867330236e-01,
+ "cpu_time": 4.8579480467445768e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1256,
+ "real_time": 4.3843196005008783e-01,
+ "cpu_time": 4.3842444347133841e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 721,
+ "real_time": 9.3752561180998317e-01,
+ "cpu_time": 9.3750926213592223e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 740,
+ "real_time": 9.3545040770156962e-01,
+ "cpu_time": 9.3543432297297435e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4947,
+ "real_time": 1.4094690816171584e-01,
+ "cpu_time": 1.4094593612290271e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3152,
+ "real_time": 2.2193788063858971e-01,
+ "cpu_time": 2.2193630583756352e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3129,
+ "real_time": 2.2150588496571799e-01,
+ "cpu_time": 2.2150361585171055e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2742,
+ "real_time": 2.5509349882254889e-01,
+ "cpu_time": 2.5509168016046663e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2746,
+ "real_time": 2.5341272739407272e-01,
+ "cpu_time": 2.5341093226511219e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2689,
+ "real_time": 2.6112427472979571e-01,
+ "cpu_time": 2.6112239866121267e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4988,
+ "real_time": 1.4059869355933610e-01,
+ "cpu_time": 1.4059724599037698e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..268cf92d
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:28:14+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.09, 3.45, 4.42
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 36.4 ms 36.4 ms 19
+MLIR_Conv2D/1 123 ms 123 ms 6
+Buddy_Conv2D/1 4.26 ms 4.26 ms 164
+Buddy_Corr2D_Constant_Padding/1 8.09 ms 8.09 ms 87
+OpenCV_Filter2D_Constant_Padding/1 6.03 ms 6.03 ms 116
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4740
+Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2578
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101183
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47997
+Buddy_Erosion2D_Constant_Padding/1 0.281 ms 0.281 ms 2553
+Buddy_Dilation2D_Constant_Padding/1 0.269 ms 0.269 ms 2317
+Buddy_Opening2D_Constant_Padding/1 0.486 ms 0.486 ms 1198
+Buddy_Closing2D_Constant_Padding/1 0.438 ms 0.438 ms 1256
+Buddy_TopHat2D_Constant_Padding/1 0.938 ms 0.938 ms 721
+Buddy_BottomHat2D_Constant_Padding/1 0.935 ms 0.935 ms 740
+OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4947
+OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3152
+OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3129
+OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2742
+OpenCV_BottomHat2D_Constant_Padding/1 0.253 ms 0.253 ms 2746
+OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2689
+OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4988
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..d7045e77
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:28:38+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.06299,3.42188,4.38721],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 19,
+ "real_time": 3.6139344306368578e+01,
+ "cpu_time": 3.6132733684210528e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 6,
+ "real_time": 1.2281656203170617e+02,
+ "cpu_time": 1.2280101033333335e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 174,
+ "real_time": 4.0128050136497651e+00,
+ "cpu_time": 4.0122547816091965e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 86,
+ "real_time": 8.0886327683232544e+00,
+ "cpu_time": 8.0876173023255777e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 116,
+ "real_time": 6.0190088276205396e+00,
+ "cpu_time": 6.0181940862068943e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4751,
+ "real_time": 1.4789280477912017e-01,
+ "cpu_time": 1.4787357756261846e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2581,
+ "real_time": 2.7365781617968271e-01,
+ "cpu_time": 2.7361916388996527e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101075,
+ "real_time": 6.9230752152851715e-03,
+ "cpu_time": 6.9229836655948533e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47881,
+ "real_time": 1.4611373583860823e-02,
+ "cpu_time": 1.4611187402101052e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2510,
+ "real_time": 3.0442776225240109e-01,
+ "cpu_time": 3.0439000358565693e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2091,
+ "real_time": 2.6490774481334167e-01,
+ "cpu_time": 2.6486507604017184e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1262,
+ "real_time": 4.7815910196625488e-01,
+ "cpu_time": 4.7810812282091797e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1152,
+ "real_time": 4.8954437241061693e-01,
+ "cpu_time": 4.8945949305555547e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 713,
+ "real_time": 9.2543011627892791e-01,
+ "cpu_time": 9.2528858064516162e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 733,
+ "real_time": 9.2815726710862057e-01,
+ "cpu_time": 9.2803548840382022e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4956,
+ "real_time": 1.4103952227002484e-01,
+ "cpu_time": 1.4102089689265529e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3097,
+ "real_time": 2.2641591508811160e-01,
+ "cpu_time": 2.2638668517920651e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3063,
+ "real_time": 2.2829655792601858e-01,
+ "cpu_time": 2.2829427815866776e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2628,
+ "real_time": 2.6419852495964624e-01,
+ "cpu_time": 2.6419667199391139e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2623,
+ "real_time": 2.6604910979414548e-01,
+ "cpu_time": 2.6604722645825318e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2690,
+ "real_time": 2.6090611395561120e-01,
+ "cpu_time": 2.6090426319702570e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4952,
+ "real_time": 1.4145706474829181e-01,
+ "cpu_time": 1.4145561409531551e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..34c65b95
--- /dev/null
+++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:28:38+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.06, 3.42, 4.39
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 36.1 ms 36.1 ms 19
+MLIR_Conv2D/1 123 ms 123 ms 6
+Buddy_Conv2D/1 4.01 ms 4.01 ms 174
+Buddy_Corr2D_Constant_Padding/1 8.09 ms 8.09 ms 86
+OpenCV_Filter2D_Constant_Padding/1 6.02 ms 6.02 ms 116
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4751
+Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2581
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101075
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47881
+Buddy_Erosion2D_Constant_Padding/1 0.304 ms 0.304 ms 2510
+Buddy_Dilation2D_Constant_Padding/1 0.265 ms 0.265 ms 2091
+Buddy_Opening2D_Constant_Padding/1 0.478 ms 0.478 ms 1262
+Buddy_Closing2D_Constant_Padding/1 0.490 ms 0.489 ms 1152
+Buddy_TopHat2D_Constant_Padding/1 0.925 ms 0.925 ms 713
+Buddy_BottomHat2D_Constant_Padding/1 0.928 ms 0.928 ms 733
+OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4956
+OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3097
+OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 3063
+OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2628
+OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2623
+OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2690
+OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4952
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..538ed5ff
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:23:19+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.03271,3.75732,4.84424],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 136,
+ "real_time": 5.1130582030643437e+00,
+ "cpu_time": 5.1129978750000005e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 92,
+ "real_time": 7.5614458840826284e+00,
+ "cpu_time": 7.5612571630434795e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 991,
+ "real_time": 7.3911826286979931e-01,
+ "cpu_time": 7.3911208072653889e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 634,
+ "real_time": 1.0977113554812381e+00,
+ "cpu_time": 1.0976892066246056e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 359,
+ "real_time": 1.9464333305617894e+00,
+ "cpu_time": 1.9463996295264627e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4731,
+ "real_time": 1.4831822525368915e-01,
+ "cpu_time": 1.4831527478334391e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2579,
+ "real_time": 2.7571449409440857e-01,
+ "cpu_time": 2.7571217332299330e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101112,
+ "real_time": 6.9151851410107082e-03,
+ "cpu_time": 6.9149452488329738e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47882,
+ "real_time": 1.4606120489572882e-02,
+ "cpu_time": 1.4605831794828930e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2267,
+ "real_time": 2.9327621150237643e-01,
+ "cpu_time": 2.9326395059550070e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2325,
+ "real_time": 2.8085091902363685e-01,
+ "cpu_time": 2.8084884645161295e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1277,
+ "real_time": 4.9499843850113401e-01,
+ "cpu_time": 4.9497863508222334e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1822,
+ "real_time": 3.4349706150697695e-01,
+ "cpu_time": 3.4349251042810131e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 705,
+ "real_time": 9.3753048291443086e-01,
+ "cpu_time": 9.3750746524822892e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 717,
+ "real_time": 9.3463689836996866e-01,
+ "cpu_time": 9.3461899302649698e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4954,
+ "real_time": 1.4094130591394249e-01,
+ "cpu_time": 1.4093779975777146e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3148,
+ "real_time": 2.2180359976264391e-01,
+ "cpu_time": 2.2180045520965658e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3189,
+ "real_time": 2.1949756099685078e-01,
+ "cpu_time": 2.1949316149263115e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2736,
+ "real_time": 2.5490349926577327e-01,
+ "cpu_time": 2.5490010709064376e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2737,
+ "real_time": 2.5427932852717877e-01,
+ "cpu_time": 2.5427474059188909e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2679,
+ "real_time": 2.6234736879533155e-01,
+ "cpu_time": 2.6234529749906660e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4852,
+ "real_time": 1.4455454318069369e-01,
+ "cpu_time": 1.4455208182192936e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..bbdcfd5a
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:23:19+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.03, 3.76, 4.84
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 5.11 ms 5.11 ms 136
+MLIR_Conv2D/1 7.56 ms 7.56 ms 92
+Buddy_Conv2D/1 0.739 ms 0.739 ms 991
+Buddy_Corr2D_Constant_Padding/1 1.10 ms 1.10 ms 634
+OpenCV_Filter2D_Constant_Padding/1 1.95 ms 1.95 ms 359
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4731
+Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2579
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101112
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47882
+Buddy_Erosion2D_Constant_Padding/1 0.293 ms 0.293 ms 2267
+Buddy_Dilation2D_Constant_Padding/1 0.281 ms 0.281 ms 2325
+Buddy_Opening2D_Constant_Padding/1 0.495 ms 0.495 ms 1277
+Buddy_Closing2D_Constant_Padding/1 0.343 ms 0.343 ms 1822
+Buddy_TopHat2D_Constant_Padding/1 0.938 ms 0.938 ms 705
+Buddy_BottomHat2D_Constant_Padding/1 0.935 ms 0.935 ms 717
+OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4954
+OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3148
+OpenCV_Closing2D_Constant_Padding/1 0.219 ms 0.219 ms 3189
+OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2736
+OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2737
+OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2679
+OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4852
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..4622ce88
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:23:42+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.10303,3.72461,4.80908],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 140,
+ "real_time": 4.9881702821169585e+00,
+ "cpu_time": 4.9880098642857140e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 92,
+ "real_time": 7.5616192315583648e+00,
+ "cpu_time": 7.5614974891304341e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 938,
+ "real_time": 7.0375580984010877e-01,
+ "cpu_time": 7.0372593390191918e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 634,
+ "real_time": 1.0931943234773089e+00,
+ "cpu_time": 1.0931488375394325e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 360,
+ "real_time": 1.9437930236260097e+00,
+ "cpu_time": 1.9436920166666676e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4731,
+ "real_time": 1.4828980958025845e-01,
+ "cpu_time": 1.4828513654618480e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2575,
+ "real_time": 2.7519861905320175e-01,
+ "cpu_time": 2.7518627728155343e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100987,
+ "real_time": 6.9202101019204187e-03,
+ "cpu_time": 6.9199823739689269e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47878,
+ "real_time": 1.4626334239538314e-02,
+ "cpu_time": 1.4625822966707055e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2361,
+ "real_time": 2.8520150363369545e-01,
+ "cpu_time": 2.8518143879711966e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2362,
+ "real_time": 3.0680066550262491e-01,
+ "cpu_time": 3.0677437510584199e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1000,
+ "real_time": 6.1990195512771606e-01,
+ "cpu_time": 6.1985857099999997e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1712,
+ "real_time": 3.8910013945581756e-01,
+ "cpu_time": 3.8908160338785042e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 698,
+ "real_time": 9.5385507304210715e-01,
+ "cpu_time": 9.5382185100286465e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 712,
+ "real_time": 9.5589765546362049e-01,
+ "cpu_time": 9.5589013202247197e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4941,
+ "real_time": 1.4135648417849214e-01,
+ "cpu_time": 1.4134417870876342e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3094,
+ "real_time": 2.2559705339059108e-01,
+ "cpu_time": 2.2558599385908198e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3097,
+ "real_time": 2.2691802094934985e-01,
+ "cpu_time": 2.2690593283823002e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2642,
+ "real_time": 2.6526975668938568e-01,
+ "cpu_time": 2.6526233232399676e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2635,
+ "real_time": 2.6353473653150916e-01,
+ "cpu_time": 2.6351631650853868e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2666,
+ "real_time": 2.6146747557542777e-01,
+ "cpu_time": 2.6145232070517682e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4900,
+ "real_time": 1.4341449479059298e-01,
+ "cpu_time": 1.4340792591836732e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..d5b3ef25
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:23:42+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.10, 3.72, 4.81
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 4.99 ms 4.99 ms 140
+MLIR_Conv2D/1 7.56 ms 7.56 ms 92
+Buddy_Conv2D/1 0.704 ms 0.704 ms 938
+Buddy_Corr2D_Constant_Padding/1 1.09 ms 1.09 ms 634
+OpenCV_Filter2D_Constant_Padding/1 1.94 ms 1.94 ms 360
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4731
+Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2575
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100987
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47878
+Buddy_Erosion2D_Constant_Padding/1 0.285 ms 0.285 ms 2361
+Buddy_Dilation2D_Constant_Padding/1 0.307 ms 0.307 ms 2362
+Buddy_Opening2D_Constant_Padding/1 0.620 ms 0.620 ms 1000
+Buddy_Closing2D_Constant_Padding/1 0.389 ms 0.389 ms 1712
+Buddy_TopHat2D_Constant_Padding/1 0.954 ms 0.954 ms 698
+Buddy_BottomHat2D_Constant_Padding/1 0.956 ms 0.956 ms 712
+OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4941
+OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3094
+OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3097
+OpenCV_TopHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2642
+OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2635
+OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2666
+OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4900
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..aea059fb
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:24:06+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.06689,3.66504,4.76025],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 58,
+ "real_time": 1.1998714814926016e+01,
+ "cpu_time": 1.1997443879310346e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 23,
+ "real_time": 3.0360637961522393e+01,
+ "cpu_time": 3.0356734521739146e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 322,
+ "real_time": 2.1898058237311262e+00,
+ "cpu_time": 2.1895025559006216e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 377,
+ "real_time": 1.8404441384168773e+00,
+ "cpu_time": 1.8402098037135266e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 248,
+ "real_time": 2.8030298680307402e+00,
+ "cpu_time": 2.8026356330645159e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4741,
+ "real_time": 1.4848858149487670e-01,
+ "cpu_time": 1.4846917338114329e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2547,
+ "real_time": 2.7368638209609553e-01,
+ "cpu_time": 2.7364592854338443e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101088,
+ "real_time": 6.9170411064131193e-03,
+ "cpu_time": 6.9161112792814165e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47818,
+ "real_time": 1.4660072789606434e-02,
+ "cpu_time": 1.4659816428959836e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2386,
+ "real_time": 2.8420215330499615e-01,
+ "cpu_time": 2.8416078960603486e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2544,
+ "real_time": 2.8090887269555770e-01,
+ "cpu_time": 2.8086637106918216e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1000,
+ "real_time": 6.0656908899545670e-01,
+ "cpu_time": 6.0655633599999881e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1866,
+ "real_time": 3.7647417416079382e-01,
+ "cpu_time": 3.7641147481243248e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 737,
+ "real_time": 9.6684982406235775e-01,
+ "cpu_time": 9.6672385888738199e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 692,
+ "real_time": 9.6456302232997271e-01,
+ "cpu_time": 9.6441563439306299e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4842,
+ "real_time": 1.4419430610637241e-01,
+ "cpu_time": 1.4419237773647245e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3046,
+ "real_time": 2.3022636120560447e-01,
+ "cpu_time": 2.3022371799080765e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3026,
+ "real_time": 2.3162924122589751e-01,
+ "cpu_time": 2.3162562723066799e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2617,
+ "real_time": 2.6727178663208961e-01,
+ "cpu_time": 2.6726527283148621e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2605,
+ "real_time": 2.6782371547080275e-01,
+ "cpu_time": 2.6782162571976953e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2647,
+ "real_time": 2.6472585017518091e-01,
+ "cpu_time": 2.6472381412920259e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4880,
+ "real_time": 1.4423252938345807e-01,
+ "cpu_time": 1.4421351024590140e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..91bbb2d4
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:24:06+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.07, 3.67, 4.76
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 12.0 ms 12.0 ms 58
+MLIR_Conv2D/1 30.4 ms 30.4 ms 23
+Buddy_Conv2D/1 2.19 ms 2.19 ms 322
+Buddy_Corr2D_Constant_Padding/1 1.84 ms 1.84 ms 377
+OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 248
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4741
+Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2547
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101088
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47818
+Buddy_Erosion2D_Constant_Padding/1 0.284 ms 0.284 ms 2386
+Buddy_Dilation2D_Constant_Padding/1 0.281 ms 0.281 ms 2544
+Buddy_Opening2D_Constant_Padding/1 0.607 ms 0.607 ms 1000
+Buddy_Closing2D_Constant_Padding/1 0.376 ms 0.376 ms 1866
+Buddy_TopHat2D_Constant_Padding/1 0.967 ms 0.967 ms 737
+Buddy_BottomHat2D_Constant_Padding/1 0.965 ms 0.964 ms 692
+OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4842
+OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3046
+OpenCV_Closing2D_Constant_Padding/1 0.232 ms 0.232 ms 3026
+OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2617
+OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2605
+OpenCV_MorphGrad2D_Constant_Padding/1 0.265 ms 0.265 ms 2647
+OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4880
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..24206b65
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:24:30+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.04297,3.61035,4.71191],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 58,
+ "real_time": 1.2075262786499385e+01,
+ "cpu_time": 1.2073443568965518e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 23,
+ "real_time": 3.0510915362316631e+01,
+ "cpu_time": 3.0506920913043473e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 314,
+ "real_time": 2.1884666672747608e+00,
+ "cpu_time": 2.1881547292993626e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 381,
+ "real_time": 1.8304030354723844e+00,
+ "cpu_time": 1.8301742624671924e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 250,
+ "real_time": 2.8018697649240494e+00,
+ "cpu_time": 2.8014706559999993e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4721,
+ "real_time": 1.4815152991855834e-01,
+ "cpu_time": 1.4813229760643945e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2535,
+ "real_time": 2.7588403965594499e-01,
+ "cpu_time": 2.7584648875739659e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101044,
+ "real_time": 6.9154953151760795e-03,
+ "cpu_time": 6.9154101480543182e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47923,
+ "real_time": 1.4632370029027023e-02,
+ "cpu_time": 1.4632033240823825e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1991,
+ "real_time": 3.2995011431077570e-01,
+ "cpu_time": 3.2994335409342052e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1915,
+ "real_time": 2.8319770066607403e-01,
+ "cpu_time": 2.8319159164490931e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1138,
+ "real_time": 5.3577692049877923e-01,
+ "cpu_time": 5.3576669420035217e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1124,
+ "real_time": 4.9952752357912233e-01,
+ "cpu_time": 4.9943513434163772e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 719,
+ "real_time": 9.1689284657735648e-01,
+ "cpu_time": 9.1676078859526888e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 695,
+ "real_time": 9.6813468731564578e-01,
+ "cpu_time": 9.6800758705035905e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4798,
+ "real_time": 1.4492751205151355e-01,
+ "cpu_time": 1.4491701792413500e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2989,
+ "real_time": 2.3426035498248649e-01,
+ "cpu_time": 2.3425851689528218e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2969,
+ "real_time": 2.3408840153505603e-01,
+ "cpu_time": 2.3408375345234064e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2614,
+ "real_time": 2.6729449133838329e-01,
+ "cpu_time": 2.6729081216526446e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2630,
+ "real_time": 2.6423307454404721e-01,
+ "cpu_time": 2.6422895171102773e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2626,
+ "real_time": 2.7601832562244671e-01,
+ "cpu_time": 2.7601385605483625e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4881,
+ "real_time": 1.4363530366129135e-01,
+ "cpu_time": 1.4361626715836887e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..bba83998
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:24:30+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.04, 3.61, 4.71
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 12.1 ms 12.1 ms 58
+MLIR_Conv2D/1 30.5 ms 30.5 ms 23
+Buddy_Conv2D/1 2.19 ms 2.19 ms 314
+Buddy_Corr2D_Constant_Padding/1 1.83 ms 1.83 ms 381
+OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 250
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4721
+Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2535
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101044
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47923
+Buddy_Erosion2D_Constant_Padding/1 0.330 ms 0.330 ms 1991
+Buddy_Dilation2D_Constant_Padding/1 0.283 ms 0.283 ms 1915
+Buddy_Opening2D_Constant_Padding/1 0.536 ms 0.536 ms 1138
+Buddy_Closing2D_Constant_Padding/1 0.500 ms 0.499 ms 1124
+Buddy_TopHat2D_Constant_Padding/1 0.917 ms 0.917 ms 719
+Buddy_BottomHat2D_Constant_Padding/1 0.968 ms 0.968 ms 695
+OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4798
+OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 2989
+OpenCV_Closing2D_Constant_Padding/1 0.234 ms 0.234 ms 2969
+OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2614
+OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2630
+OpenCV_MorphGrad2D_Constant_Padding/1 0.276 ms 0.276 ms 2626
+OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4881
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..73c6c792
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:19:23+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [4.16504,4.64014,5.37695],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 139,
+ "real_time": 5.0173752385077712e+00,
+ "cpu_time": 5.0171431366906489e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 93,
+ "real_time": 7.5428272367164650e+00,
+ "cpu_time": 7.5426328494623673e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1245,
+ "real_time": 5.5684779123608841e-01,
+ "cpu_time": 5.5682440080321294e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 828,
+ "real_time": 8.4202265555875888e-01,
+ "cpu_time": 8.4200422584541035e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 531,
+ "real_time": 1.3090186522617628e+00,
+ "cpu_time": 1.3089821713747642e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4709,
+ "real_time": 1.4896581911007850e-01,
+ "cpu_time": 1.4896079018899980e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2577,
+ "real_time": 2.7260999168172845e-01,
+ "cpu_time": 2.7260413853317822e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100412,
+ "real_time": 6.9631285576930929e-03,
+ "cpu_time": 6.9626375632394480e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47961,
+ "real_time": 1.4606321100466758e-02,
+ "cpu_time": 1.4605708992723267e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2105,
+ "real_time": 3.0771779369854868e-01,
+ "cpu_time": 3.0768270926365782e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2485,
+ "real_time": 3.0552347601059698e-01,
+ "cpu_time": 3.0551694647887367e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1103,
+ "real_time": 5.5804573411522185e-01,
+ "cpu_time": 5.5798702266545741e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1192,
+ "real_time": 5.6027826984356710e-01,
+ "cpu_time": 5.6026445553691262e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 698,
+ "real_time": 9.4223976199299009e-01,
+ "cpu_time": 9.4221851719197713e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 710,
+ "real_time": 9.8398026656097093e-01,
+ "cpu_time": 9.8392653380281692e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4902,
+ "real_time": 1.4306782463262246e-01,
+ "cpu_time": 1.4306102203182383e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3075,
+ "real_time": 2.2767367886333931e-01,
+ "cpu_time": 2.2766505788617861e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3093,
+ "real_time": 2.2700202289692917e-01,
+ "cpu_time": 2.2700032945360529e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2641,
+ "real_time": 2.6595405740568195e-01,
+ "cpu_time": 2.6595034721696315e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2644,
+ "real_time": 2.6620356380127924e-01,
+ "cpu_time": 2.6620060287443309e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2674,
+ "real_time": 2.6205028271443287e-01,
+ "cpu_time": 2.6204837621540705e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4913,
+ "real_time": 1.4280434565172404e-01,
+ "cpu_time": 1.4280282454712012e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..a978a72e
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:19:23+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 4.17, 4.64, 5.38
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 5.02 ms 5.02 ms 139
+MLIR_Conv2D/1 7.54 ms 7.54 ms 93
+Buddy_Conv2D/1 0.557 ms 0.557 ms 1245
+Buddy_Corr2D_Constant_Padding/1 0.842 ms 0.842 ms 828
+OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 531
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4709
+Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2577
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100412
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47961
+Buddy_Erosion2D_Constant_Padding/1 0.308 ms 0.308 ms 2105
+Buddy_Dilation2D_Constant_Padding/1 0.306 ms 0.306 ms 2485
+Buddy_Opening2D_Constant_Padding/1 0.558 ms 0.558 ms 1103
+Buddy_Closing2D_Constant_Padding/1 0.560 ms 0.560 ms 1192
+Buddy_TopHat2D_Constant_Padding/1 0.942 ms 0.942 ms 698
+Buddy_BottomHat2D_Constant_Padding/1 0.984 ms 0.984 ms 710
+OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4902
+OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3075
+OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3093
+OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2641
+OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2644
+OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2674
+OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4913
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..a7d1c605
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:19:46+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.8335,4.5332,5.3252],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 138,
+ "real_time": 5.0004229111515954e+00,
+ "cpu_time": 5.0002145289855076e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 92,
+ "real_time": 7.6372407214797065e+00,
+ "cpu_time": 7.6370552065217412e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1355,
+ "real_time": 5.2936936821444891e-01,
+ "cpu_time": 5.2935688634686340e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 842,
+ "real_time": 8.3820865159929503e-01,
+ "cpu_time": 8.3819253444180519e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 532,
+ "real_time": 1.3121523915376878e+00,
+ "cpu_time": 1.3121226522556384e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4690,
+ "real_time": 1.4891786330036008e-01,
+ "cpu_time": 1.4891265415778249e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2560,
+ "real_time": 2.7395108627388254e-01,
+ "cpu_time": 2.7393973632812496e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100948,
+ "real_time": 6.9163674436630357e-03,
+ "cpu_time": 6.9160762570828491e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47793,
+ "real_time": 1.4607901661653386e-02,
+ "cpu_time": 1.4607340426422288e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2111,
+ "real_time": 2.9704664370167710e-01,
+ "cpu_time": 2.9704162955945018e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2524,
+ "real_time": 2.9669307827618913e-01,
+ "cpu_time": 2.9669044453248805e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1162,
+ "real_time": 5.5793046040809913e-01,
+ "cpu_time": 5.5792592254733153e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1000,
+ "real_time": 5.7320801541209221e-01,
+ "cpu_time": 5.7317745799999997e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 726,
+ "real_time": 9.6233903376524110e-01,
+ "cpu_time": 9.6231244352617151e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 698,
+ "real_time": 9.5446288244088950e-01,
+ "cpu_time": 9.5444632521490114e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4912,
+ "real_time": 1.4231482092958514e-01,
+ "cpu_time": 1.4231082552931573e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3094,
+ "real_time": 2.2435946595923084e-01,
+ "cpu_time": 2.2434201357466077e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3059,
+ "real_time": 2.2924085333090899e-01,
+ "cpu_time": 2.2922583458646567e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2607,
+ "real_time": 2.6636522107532212e-01,
+ "cpu_time": 2.6635171461449875e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2651,
+ "real_time": 2.6092289197629281e-01,
+ "cpu_time": 2.6090935382874364e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2641,
+ "real_time": 2.6426135376518939e-01,
+ "cpu_time": 2.6424787012495315e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4878,
+ "real_time": 1.4334160020455028e-01,
+ "cpu_time": 1.4333910229602290e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..5f275baa
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:19:46+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.83, 4.53, 5.33
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 5.00 ms 5.00 ms 138
+MLIR_Conv2D/1 7.64 ms 7.64 ms 92
+Buddy_Conv2D/1 0.529 ms 0.529 ms 1355
+Buddy_Corr2D_Constant_Padding/1 0.838 ms 0.838 ms 842
+OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 532
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4690
+Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2560
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100948
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47793
+Buddy_Erosion2D_Constant_Padding/1 0.297 ms 0.297 ms 2111
+Buddy_Dilation2D_Constant_Padding/1 0.297 ms 0.297 ms 2524
+Buddy_Opening2D_Constant_Padding/1 0.558 ms 0.558 ms 1162
+Buddy_Closing2D_Constant_Padding/1 0.573 ms 0.573 ms 1000
+Buddy_TopHat2D_Constant_Padding/1 0.962 ms 0.962 ms 726
+Buddy_BottomHat2D_Constant_Padding/1 0.954 ms 0.954 ms 698
+OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4912
+OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3094
+OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3059
+OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2607
+OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2651
+OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2641
+OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4878
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..0bdbe2b4
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:20:10+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.54834,4.40869,5.26172],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 135,
+ "real_time": 5.1553568078411951e+00,
+ "cpu_time": 5.1551722074074089e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 92,
+ "real_time": 7.5749951293287072e+00,
+ "cpu_time": 7.5747488586956546e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1284,
+ "real_time": 5.3980214248565128e-01,
+ "cpu_time": 5.3977712928348909e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 832,
+ "real_time": 8.4146023996604180e-01,
+ "cpu_time": 8.4142216826923055e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 533,
+ "real_time": 1.3132831169114103e+00,
+ "cpu_time": 1.3132458273921197e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4700,
+ "real_time": 1.4895170213694267e-01,
+ "cpu_time": 1.4894422382978711e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2571,
+ "real_time": 2.7564828269957381e-01,
+ "cpu_time": 2.7563723648385829e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101036,
+ "real_time": 6.9258848736223769e-03,
+ "cpu_time": 6.9255131933172341e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47859,
+ "real_time": 1.4628459030991201e-02,
+ "cpu_time": 1.4628155059654391e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2611,
+ "real_time": 2.7920378351385045e-01,
+ "cpu_time": 2.7918063960168527e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2244,
+ "real_time": 2.7905655742594693e-01,
+ "cpu_time": 2.7904662745097980e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1236,
+ "real_time": 5.2211519246348281e-01,
+ "cpu_time": 5.2209284142394796e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1165,
+ "real_time": 4.4711210760947462e-01,
+ "cpu_time": 4.4709575622317560e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 715,
+ "real_time": 9.3931627127674078e-01,
+ "cpu_time": 9.3925903916084019e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 723,
+ "real_time": 9.5567062888732424e-01,
+ "cpu_time": 9.5561843983402406e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4917,
+ "real_time": 1.4225059871341922e-01,
+ "cpu_time": 1.4224726479560690e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2994,
+ "real_time": 2.3417615903443151e-01,
+ "cpu_time": 2.3416278323313275e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2975,
+ "real_time": 2.3566543179399826e-01,
+ "cpu_time": 2.3564990386554591e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2582,
+ "real_time": 2.6798345968069537e-01,
+ "cpu_time": 2.6797476646010809e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2607,
+ "real_time": 2.7319310375178141e-01,
+ "cpu_time": 2.7318398925968451e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2649,
+ "real_time": 2.6686968568047292e-01,
+ "cpu_time": 2.6686162136655345e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4944,
+ "real_time": 1.4209808046329755e-01,
+ "cpu_time": 1.4209095307443359e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..29391a63
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:20:10+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.55, 4.41, 5.26
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 5.16 ms 5.16 ms 135
+MLIR_Conv2D/1 7.57 ms 7.57 ms 92
+Buddy_Conv2D/1 0.540 ms 0.540 ms 1284
+Buddy_Corr2D_Constant_Padding/1 0.841 ms 0.841 ms 832
+OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 533
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4700
+Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2571
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101036
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47859
+Buddy_Erosion2D_Constant_Padding/1 0.279 ms 0.279 ms 2611
+Buddy_Dilation2D_Constant_Padding/1 0.279 ms 0.279 ms 2244
+Buddy_Opening2D_Constant_Padding/1 0.522 ms 0.522 ms 1236
+Buddy_Closing2D_Constant_Padding/1 0.447 ms 0.447 ms 1165
+Buddy_TopHat2D_Constant_Padding/1 0.939 ms 0.939 ms 715
+Buddy_BottomHat2D_Constant_Padding/1 0.956 ms 0.956 ms 723
+OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4917
+OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 2994
+OpenCV_Closing2D_Constant_Padding/1 0.236 ms 0.236 ms 2975
+OpenCV_TopHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2582
+OpenCV_BottomHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2607
+OpenCV_MorphGrad2D_Constant_Padding/1 0.267 ms 0.267 ms 2649
+OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4944
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..f2f95365
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:20:33+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.47266,4.33301,5.21826],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 139,
+ "real_time": 5.0128592754439483e+00,
+ "cpu_time": 5.0124305971223029e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 92,
+ "real_time": 7.6231503373254901e+00,
+ "cpu_time": 7.6229129565217395e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1298,
+ "real_time": 5.2513539481144655e-01,
+ "cpu_time": 5.2512819799691823e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 840,
+ "real_time": 8.3748531926955494e-01,
+ "cpu_time": 8.3737668214285732e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 532,
+ "real_time": 1.3121627901393669e+00,
+ "cpu_time": 1.3119710808270681e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4720,
+ "real_time": 1.4909265436610933e-01,
+ "cpu_time": 1.4907397690677973e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2565,
+ "real_time": 2.7453704332282902e-01,
+ "cpu_time": 2.7450112709551661e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100545,
+ "real_time": 6.9568862453947087e-03,
+ "cpu_time": 6.9558761649012911e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47892,
+ "real_time": 1.4604156723145325e-02,
+ "cpu_time": 1.4601928923410997e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2439,
+ "real_time": 2.9473153659680573e-01,
+ "cpu_time": 2.9469328085280849e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2168,
+ "real_time": 2.8830999164838633e-01,
+ "cpu_time": 2.8826715083025839e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1168,
+ "real_time": 4.9684155566541299e-01,
+ "cpu_time": 4.9675358390410967e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1106,
+ "real_time": 4.7481091016024407e-01,
+ "cpu_time": 4.7480679113924112e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 689,
+ "real_time": 9.6393543659170067e-01,
+ "cpu_time": 9.6392819158200227e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 707,
+ "real_time": 9.6352926364545266e-01,
+ "cpu_time": 9.6352213578500723e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4934,
+ "real_time": 1.4176106599114616e-01,
+ "cpu_time": 1.4175841284961477e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3072,
+ "real_time": 2.2831910731232105e-01,
+ "cpu_time": 2.2831633561197898e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3104,
+ "real_time": 2.2609134987188675e-01,
+ "cpu_time": 2.2608958762886630e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2646,
+ "real_time": 2.6597667595291352e-01,
+ "cpu_time": 2.6597261753590368e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2601,
+ "real_time": 2.6558996427063758e-01,
+ "cpu_time": 2.6558628604382972e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2659,
+ "real_time": 2.6361091008487614e-01,
+ "cpu_time": 2.6360892515983414e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4871,
+ "real_time": 1.4462083764718411e-01,
+ "cpu_time": 1.4461981831246187e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..136e2000
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:20:33+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.47, 4.33, 5.22
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 5.01 ms 5.01 ms 139
+MLIR_Conv2D/1 7.62 ms 7.62 ms 92
+Buddy_Conv2D/1 0.525 ms 0.525 ms 1298
+Buddy_Corr2D_Constant_Padding/1 0.837 ms 0.837 ms 840
+OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 532
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4720
+Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2565
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100545
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47892
+Buddy_Erosion2D_Constant_Padding/1 0.295 ms 0.295 ms 2439
+Buddy_Dilation2D_Constant_Padding/1 0.288 ms 0.288 ms 2168
+Buddy_Opening2D_Constant_Padding/1 0.497 ms 0.497 ms 1168
+Buddy_Closing2D_Constant_Padding/1 0.475 ms 0.475 ms 1106
+Buddy_TopHat2D_Constant_Padding/1 0.964 ms 0.964 ms 689
+Buddy_BottomHat2D_Constant_Padding/1 0.964 ms 0.964 ms 707
+OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4934
+OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3072
+OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3104
+OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2646
+OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2601
+OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2659
+OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4871
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..2731abdb
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:20:55+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.31006,4.22461,5.1582],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 57,
+ "real_time": 1.2186227660430106e+01,
+ "cpu_time": 1.2185617228070177e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 23,
+ "real_time": 3.0569540579681810e+01,
+ "cpu_time": 3.0567936217391306e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 212,
+ "real_time": 3.2612147899168842e+00,
+ "cpu_time": 3.2610398018867928e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 287,
+ "real_time": 2.4475226055454296e+00,
+ "cpu_time": 2.4473893937282227e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 162,
+ "real_time": 4.3507855339550678e+00,
+ "cpu_time": 4.3502307716049353e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4702,
+ "real_time": 1.4982172392977394e-01,
+ "cpu_time": 1.4980982752020414e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2551,
+ "real_time": 2.7482590690307734e-01,
+ "cpu_time": 2.7479107918463358e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101196,
+ "real_time": 6.9294706260273542e-03,
+ "cpu_time": 6.9288548559231639e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47024,
+ "real_time": 1.4854396933289805e-02,
+ "cpu_time": 1.4854171763354861e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2302,
+ "real_time": 3.2017572251327342e-01,
+ "cpu_time": 3.2016908123371007e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2501,
+ "real_time": 3.0623350749726963e-01,
+ "cpu_time": 3.0622376009596158e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1120,
+ "real_time": 5.8248242296810659e-01,
+ "cpu_time": 5.8245296071428587e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1000,
+ "real_time": 5.7962449267506599e-01,
+ "cpu_time": 5.7961194700000007e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 745,
+ "real_time": 9.4148450649824722e-01,
+ "cpu_time": 9.4141236644295290e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 714,
+ "real_time": 9.2218336792720135e-01,
+ "cpu_time": 9.2215487114845818e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4928,
+ "real_time": 1.4172616798745155e-01,
+ "cpu_time": 1.4172150892857185e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3029,
+ "real_time": 2.3071269989604012e-01,
+ "cpu_time": 2.3070404060746205e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3021,
+ "real_time": 2.3122206552263050e-01,
+ "cpu_time": 2.3121828334988390e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2590,
+ "real_time": 2.7057007674314804e-01,
+ "cpu_time": 2.7055772625482621e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2602,
+ "real_time": 2.6801066311344013e-01,
+ "cpu_time": 2.6800748847040845e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2621,
+ "real_time": 2.6615193267898313e-01,
+ "cpu_time": 2.6614443151468892e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4881,
+ "real_time": 1.4305517117133568e-01,
+ "cpu_time": 1.4305138004507262e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..ff61fa5e
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:20:55+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.31, 4.22, 5.16
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 12.2 ms 12.2 ms 57
+MLIR_Conv2D/1 30.6 ms 30.6 ms 23
+Buddy_Conv2D/1 3.26 ms 3.26 ms 212
+Buddy_Corr2D_Constant_Padding/1 2.45 ms 2.45 ms 287
+OpenCV_Filter2D_Constant_Padding/1 4.35 ms 4.35 ms 162
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4702
+Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2551
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101196
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47024
+Buddy_Erosion2D_Constant_Padding/1 0.320 ms 0.320 ms 2302
+Buddy_Dilation2D_Constant_Padding/1 0.306 ms 0.306 ms 2501
+Buddy_Opening2D_Constant_Padding/1 0.582 ms 0.582 ms 1120
+Buddy_Closing2D_Constant_Padding/1 0.580 ms 0.580 ms 1000
+Buddy_TopHat2D_Constant_Padding/1 0.941 ms 0.941 ms 745
+Buddy_BottomHat2D_Constant_Padding/1 0.922 ms 0.922 ms 714
+OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4928
+OpenCV_Opening2D_Constant_Padding/1 0.231 ms 0.231 ms 3029
+OpenCV_Closing2D_Constant_Padding/1 0.231 ms 0.231 ms 3021
+OpenCV_TopHat2D_Constant_Padding/1 0.271 ms 0.271 ms 2590
+OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2602
+OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2621
+OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4881
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..0c01eb95
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:21:20+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.26123,4.14062,5.10498],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 58,
+ "real_time": 1.2124883910191469e+01,
+ "cpu_time": 1.2124201741379311e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 23,
+ "real_time": 3.0509588349124659e+01,
+ "cpu_time": 3.0508405608695647e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 210,
+ "real_time": 3.3209008652539480e+00,
+ "cpu_time": 3.3204979476190473e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 284,
+ "real_time": 2.4634649268758131e+00,
+ "cpu_time": 2.4633182781690137e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 163,
+ "real_time": 4.3028585064264897e+00,
+ "cpu_time": 4.3025189877300605e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4709,
+ "real_time": 1.4914729984278990e-01,
+ "cpu_time": 1.4913447419834347e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2576,
+ "real_time": 2.7474065843437401e-01,
+ "cpu_time": 2.7471746855590068e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101120,
+ "real_time": 6.9243410864514828e-03,
+ "cpu_time": 6.9239005241297502e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47365,
+ "real_time": 1.4754662157099186e-02,
+ "cpu_time": 1.4753382729863838e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2236,
+ "real_time": 3.0625470928941090e-01,
+ "cpu_time": 3.0622646198568843e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2168,
+ "real_time": 2.9962818306695493e-01,
+ "cpu_time": 2.9958592850553478e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1149,
+ "real_time": 5.4589507240539437e-01,
+ "cpu_time": 5.4582348563968719e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1158,
+ "real_time": 4.3890949512392746e-01,
+ "cpu_time": 4.3878794473229726e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 546,
+ "real_time": 9.8986280979690966e-01,
+ "cpu_time": 9.8963964468864618e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 707,
+ "real_time": 9.5082524916238798e-01,
+ "cpu_time": 9.5070903111739746e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4900,
+ "real_time": 1.4259748845076076e-01,
+ "cpu_time": 1.4257857632653057e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2916,
+ "real_time": 2.3652738105858304e-01,
+ "cpu_time": 2.3650978943758605e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2906,
+ "real_time": 2.4044473193385890e-01,
+ "cpu_time": 2.4042902030282134e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2393,
+ "real_time": 2.8040418241794168e-01,
+ "cpu_time": 2.8036821521103100e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2495,
+ "real_time": 2.7553062579913701e-01,
+ "cpu_time": 2.7551413947895820e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2554,
+ "real_time": 2.7534571463882501e-01,
+ "cpu_time": 2.7532446162881763e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4820,
+ "real_time": 1.4465537957878033e-01,
+ "cpu_time": 1.4464847676348577e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..b98f2736
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:21:20+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.26, 4.14, 5.10
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 12.1 ms 12.1 ms 58
+MLIR_Conv2D/1 30.5 ms 30.5 ms 23
+Buddy_Conv2D/1 3.32 ms 3.32 ms 210
+Buddy_Corr2D_Constant_Padding/1 2.46 ms 2.46 ms 284
+OpenCV_Filter2D_Constant_Padding/1 4.30 ms 4.30 ms 163
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4709
+Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2576
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101120
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47365
+Buddy_Erosion2D_Constant_Padding/1 0.306 ms 0.306 ms 2236
+Buddy_Dilation2D_Constant_Padding/1 0.300 ms 0.300 ms 2168
+Buddy_Opening2D_Constant_Padding/1 0.546 ms 0.546 ms 1149
+Buddy_Closing2D_Constant_Padding/1 0.439 ms 0.439 ms 1158
+Buddy_TopHat2D_Constant_Padding/1 0.990 ms 0.990 ms 546
+Buddy_BottomHat2D_Constant_Padding/1 0.951 ms 0.951 ms 707
+OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4900
+OpenCV_Opening2D_Constant_Padding/1 0.237 ms 0.237 ms 2916
+OpenCV_Closing2D_Constant_Padding/1 0.240 ms 0.240 ms 2906
+OpenCV_TopHat2D_Constant_Padding/1 0.280 ms 0.280 ms 2393
+OpenCV_BottomHat2D_Constant_Padding/1 0.276 ms 0.276 ms 2495
+OpenCV_MorphGrad2D_Constant_Padding/1 0.275 ms 0.275 ms 2554
+OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4820
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..b48f7511
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:21:44+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.1709,4.04785,5.04785],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 31,
+ "real_time": 2.2453694574294552e+01,
+ "cpu_time": 2.2453529516129038e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 10,
+ "real_time": 6.9538136571645737e+01,
+ "cpu_time": 6.9535859299999998e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 105,
+ "real_time": 6.5831756662754781e+00,
+ "cpu_time": 6.5830738380952374e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 145,
+ "real_time": 4.8232713906929412e+00,
+ "cpu_time": 4.8231763103448246e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 78,
+ "real_time": 8.9926493760102826e+00,
+ "cpu_time": 8.9925164102564121e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4719,
+ "real_time": 1.4828112111376565e-01,
+ "cpu_time": 1.4827689298580210e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2578,
+ "real_time": 2.7298830056532675e-01,
+ "cpu_time": 2.7298321722265312e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 101158,
+ "real_time": 6.9221397090508292e-03,
+ "cpu_time": 6.9218924257102776e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47881,
+ "real_time": 1.4618278297898276e-02,
+ "cpu_time": 1.4618093627952623e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2555,
+ "real_time": 2.8827243325994906e-01,
+ "cpu_time": 2.8826175381604680e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2332,
+ "real_time": 2.7560231980338923e-01,
+ "cpu_time": 2.7559355960548881e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1208,
+ "real_time": 5.0620788042217690e-01,
+ "cpu_time": 5.0618569039735117e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1084,
+ "real_time": 5.1128306182108241e-01,
+ "cpu_time": 5.1127205996309932e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 712,
+ "real_time": 9.4518835101737064e-01,
+ "cpu_time": 9.4518094662921193e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 712,
+ "real_time": 9.4133523407946806e-01,
+ "cpu_time": 9.4132780337078559e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4962,
+ "real_time": 1.4084929968983068e-01,
+ "cpu_time": 1.4084782728738404e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2964,
+ "real_time": 2.3630762148482598e-01,
+ "cpu_time": 2.3630250944669354e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2964,
+ "real_time": 2.3484633740304131e-01,
+ "cpu_time": 2.3484455229419723e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2567,
+ "real_time": 2.7368364135114703e-01,
+ "cpu_time": 2.7367765095442176e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2585,
+ "real_time": 2.7532173425132117e-01,
+ "cpu_time": 2.7531788704061916e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2623,
+ "real_time": 2.6751557039114704e-01,
+ "cpu_time": 2.6751091498284435e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4903,
+ "real_time": 1.4305556644694115e-01,
+ "cpu_time": 1.4305444992861513e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..eada4cc4
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:21:44+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.17, 4.05, 5.05
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 22.5 ms 22.5 ms 31
+MLIR_Conv2D/1 69.5 ms 69.5 ms 10
+Buddy_Conv2D/1 6.58 ms 6.58 ms 105
+Buddy_Corr2D_Constant_Padding/1 4.82 ms 4.82 ms 145
+OpenCV_Filter2D_Constant_Padding/1 8.99 ms 8.99 ms 78
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4719
+Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2578
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101158
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47881
+Buddy_Erosion2D_Constant_Padding/1 0.288 ms 0.288 ms 2555
+Buddy_Dilation2D_Constant_Padding/1 0.276 ms 0.276 ms 2332
+Buddy_Opening2D_Constant_Padding/1 0.506 ms 0.506 ms 1208
+Buddy_Closing2D_Constant_Padding/1 0.511 ms 0.511 ms 1084
+Buddy_TopHat2D_Constant_Padding/1 0.945 ms 0.945 ms 712
+Buddy_BottomHat2D_Constant_Padding/1 0.941 ms 0.941 ms 712
+OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4962
+OpenCV_Opening2D_Constant_Padding/1 0.236 ms 0.236 ms 2964
+OpenCV_Closing2D_Constant_Padding/1 0.235 ms 0.235 ms 2964
+OpenCV_TopHat2D_Constant_Padding/1 0.274 ms 0.274 ms 2567
+OpenCV_BottomHat2D_Constant_Padding/1 0.275 ms 0.275 ms 2585
+OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2623
+OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4903
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..6a4e41aa
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:22:08+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.12158,3.979,5.00293],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 31,
+ "real_time": 2.2618762908443326e+01,
+ "cpu_time": 2.2618371935483868e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 10,
+ "real_time": 6.9524862989783287e+01,
+ "cpu_time": 6.9523626999999991e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 106,
+ "real_time": 6.5605414433861675e+00,
+ "cpu_time": 6.5604865094339591e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 145,
+ "real_time": 4.8053482226256667e+00,
+ "cpu_time": 4.8052233655172412e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 77,
+ "real_time": 8.9911930263042450e+00,
+ "cpu_time": 8.9911225194805233e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4701,
+ "real_time": 1.4843348191028402e-01,
+ "cpu_time": 1.4843221718783239e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2581,
+ "real_time": 2.7286108469187126e-01,
+ "cpu_time": 2.7285397791553656e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100977,
+ "real_time": 6.9243114111851491e-03,
+ "cpu_time": 6.9242313695197901e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47816,
+ "real_time": 1.4641848158278912e-02,
+ "cpu_time": 1.4641626714907138e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2338,
+ "real_time": 3.2029897643777427e-01,
+ "cpu_time": 3.2029361420017116e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2051,
+ "real_time": 3.0370511982802473e-01,
+ "cpu_time": 3.0369885811799063e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1078,
+ "real_time": 5.8860488664462529e-01,
+ "cpu_time": 5.8859645361780988e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1041,
+ "real_time": 5.0554771645497409e-01,
+ "cpu_time": 5.0554403746397636e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 676,
+ "real_time": 9.8800656424295263e-01,
+ "cpu_time": 9.8798956065088772e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 681,
+ "real_time": 9.9229560440157305e-01,
+ "cpu_time": 9.9226728928046803e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4906,
+ "real_time": 1.4288953546840125e-01,
+ "cpu_time": 1.4288746534855290e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2934,
+ "real_time": 2.3779297538289587e-01,
+ "cpu_time": 2.3778759066121333e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2930,
+ "real_time": 2.3896080591165977e-01,
+ "cpu_time": 2.3895895563139949e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2536,
+ "real_time": 2.7688780405825997e-01,
+ "cpu_time": 2.7688168966877036e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2520,
+ "real_time": 2.7749545369592926e-01,
+ "cpu_time": 2.7749185238095248e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2580,
+ "real_time": 2.7239541126083033e-01,
+ "cpu_time": 2.7239121627906959e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4911,
+ "real_time": 1.4269597732809600e-01,
+ "cpu_time": 1.4269491427407904e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..d80f7666
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:22:08+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.12, 3.98, 5.00
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 22.6 ms 22.6 ms 31
+MLIR_Conv2D/1 69.5 ms 69.5 ms 10
+Buddy_Conv2D/1 6.56 ms 6.56 ms 106
+Buddy_Corr2D_Constant_Padding/1 4.81 ms 4.81 ms 145
+OpenCV_Filter2D_Constant_Padding/1 8.99 ms 8.99 ms 77
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4701
+Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2581
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100977
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47816
+Buddy_Erosion2D_Constant_Padding/1 0.320 ms 0.320 ms 2338
+Buddy_Dilation2D_Constant_Padding/1 0.304 ms 0.304 ms 2051
+Buddy_Opening2D_Constant_Padding/1 0.589 ms 0.589 ms 1078
+Buddy_Closing2D_Constant_Padding/1 0.506 ms 0.506 ms 1041
+Buddy_TopHat2D_Constant_Padding/1 0.988 ms 0.988 ms 676
+Buddy_BottomHat2D_Constant_Padding/1 0.992 ms 0.992 ms 681
+OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4906
+OpenCV_Opening2D_Constant_Padding/1 0.238 ms 0.238 ms 2934
+OpenCV_Closing2D_Constant_Padding/1 0.239 ms 0.239 ms 2930
+OpenCV_TopHat2D_Constant_Padding/1 0.277 ms 0.277 ms 2536
+OpenCV_BottomHat2D_Constant_Padding/1 0.277 ms 0.277 ms 2520
+OpenCV_MorphGrad2D_Constant_Padding/1 0.272 ms 0.272 ms 2580
+OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4911
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
new file mode 100644
index 00000000..c44dc8c1
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:22:31+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.0791,3.89893,4.94873],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 19,
+ "real_time": 3.6331885739376673e+01,
+ "cpu_time": 3.6330795842105260e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 6,
+ "real_time": 1.2376248215635617e+02,
+ "cpu_time": 1.2375778916666665e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 61,
+ "real_time": 1.1424725783652947e+01,
+ "cpu_time": 1.1424135098360654e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 86,
+ "real_time": 8.1076199232145800e+00,
+ "cpu_time": 8.1065335465116259e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 116,
+ "real_time": 6.0680480673909187e+00,
+ "cpu_time": 6.0677083189655177e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4691,
+ "real_time": 1.4930519832651948e-01,
+ "cpu_time": 1.4926992773395867e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2573,
+ "real_time": 2.7582971932959383e-01,
+ "cpu_time": 2.7580277924601621e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100634,
+ "real_time": 6.9968619756626393e-03,
+ "cpu_time": 6.9965185225669300e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 43490,
+ "real_time": 1.4810458127178799e-02,
+ "cpu_time": 1.4809136491147392e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2224,
+ "real_time": 3.1000987975020633e-01,
+ "cpu_time": 3.0994578911870491e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2187,
+ "real_time": 3.0433967416560098e-01,
+ "cpu_time": 3.0430470416095112e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1079,
+ "real_time": 5.5283736834601194e-01,
+ "cpu_time": 5.5278239110287286e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1084,
+ "real_time": 4.6356722740009704e-01,
+ "cpu_time": 4.6354099538745458e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 671,
+ "real_time": 9.9508046837393116e-01,
+ "cpu_time": 9.9501271982116168e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 694,
+ "real_time": 9.6488717717980099e-01,
+ "cpu_time": 9.6476614553314033e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4872,
+ "real_time": 1.4356119720317265e-01,
+ "cpu_time": 1.4355267775041070e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3052,
+ "real_time": 2.3091042616928234e-01,
+ "cpu_time": 2.3088996100917433e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3055,
+ "real_time": 2.2933373499033688e-01,
+ "cpu_time": 2.2932497119476267e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2601,
+ "real_time": 2.6950421458434620e-01,
+ "cpu_time": 2.6948042560553648e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2655,
+ "real_time": 2.6802309769470589e-01,
+ "cpu_time": 2.6800724218455829e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2674,
+ "real_time": 2.6259915979834036e-01,
+ "cpu_time": 2.6257767726252695e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 5002,
+ "real_time": 1.4069610041101091e-01,
+ "cpu_time": 1.4068704078368685e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
new file mode 100644
index 00000000..9d3be0fe
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:22:31+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.08, 3.90, 4.95
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 36.3 ms 36.3 ms 19
+MLIR_Conv2D/1 124 ms 124 ms 6
+Buddy_Conv2D/1 11.4 ms 11.4 ms 61
+Buddy_Corr2D_Constant_Padding/1 8.11 ms 8.11 ms 86
+OpenCV_Filter2D_Constant_Padding/1 6.07 ms 6.07 ms 116
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4691
+Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2573
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100634
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 43490
+Buddy_Erosion2D_Constant_Padding/1 0.310 ms 0.310 ms 2224
+Buddy_Dilation2D_Constant_Padding/1 0.304 ms 0.304 ms 2187
+Buddy_Opening2D_Constant_Padding/1 0.553 ms 0.553 ms 1079
+Buddy_Closing2D_Constant_Padding/1 0.464 ms 0.464 ms 1084
+Buddy_TopHat2D_Constant_Padding/1 0.995 ms 0.995 ms 671
+Buddy_BottomHat2D_Constant_Padding/1 0.965 ms 0.965 ms 694
+OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4872
+OpenCV_Opening2D_Constant_Padding/1 0.231 ms 0.231 ms 3052
+OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3055
+OpenCV_TopHat2D_Constant_Padding/1 0.270 ms 0.269 ms 2601
+OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2655
+OpenCV_MorphGrad2D_Constant_Padding/1 0.263 ms 0.263 ms 2674
+OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 5002
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
new file mode 100644
index 00000000..84797d22
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json
@@ -0,0 +1,348 @@
+{
+ "context": {
+ "date": "2025-09-07T14:22:55+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./bin/image-processing-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [3.05127,3.8252,4.89551],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "Eigen_Convolve2D/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "Eigen_Convolve2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 19,
+ "real_time": 3.5804433257956255e+01,
+ "cpu_time": 3.5803924105263171e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "MLIR_Conv2D/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 6,
+ "real_time": 1.2386105209589005e+02,
+ "cpu_time": 1.2385802349999996e+02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Conv2D/1",
+ "family_index": 2,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Conv2D/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 62,
+ "real_time": 1.1243964154874124e+01,
+ "cpu_time": 1.1243781112903225e+01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Corr2D_Constant_Padding/1",
+ "family_index": 3,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Corr2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 86,
+ "real_time": 8.1609252320472585e+00,
+ "cpu_time": 8.1606621744186025e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Filter2D_Constant_Padding/1",
+ "family_index": 4,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Filter2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 115,
+ "real_time": 6.0534707230070364e+00,
+ "cpu_time": 6.0533277826086938e+00,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 5,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4691,
+ "real_time": 1.4979387287060808e-01,
+ "cpu_time": 1.4979121743764645e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 6,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2560,
+ "real_time": 2.7500972501002252e-01,
+ "cpu_time": 2.7500316249999995e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "family_index": 7,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 100967,
+ "real_time": 6.9533759701305943e-03,
+ "cpu_time": 6.9528367090237403e-03,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "family_index": 8,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 47401,
+ "real_time": 1.4758331940115305e-02,
+ "cpu_time": 1.4757342018100885e-02,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Erosion2D_Constant_Padding/1",
+ "family_index": 9,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Erosion2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2397,
+ "real_time": 3.0549748811962507e-01,
+ "cpu_time": 3.0548149436796035e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Dilation2D_Constant_Padding/1",
+ "family_index": 10,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Dilation2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2198,
+ "real_time": 2.8026699375791697e-01,
+ "cpu_time": 2.8025402229299401e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Opening2D_Constant_Padding/1",
+ "family_index": 11,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1071,
+ "real_time": 5.2040776743942285e-01,
+ "cpu_time": 5.2038497292250174e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_Closing2D_Constant_Padding/1",
+ "family_index": 12,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 1510,
+ "real_time": 3.8458729865929936e-01,
+ "cpu_time": 3.8456946887417259e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_TopHat2D_Constant_Padding/1",
+ "family_index": 13,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 695,
+ "real_time": 9.8917052792988236e-01,
+ "cpu_time": 9.8913716402877794e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "family_index": 14,
+ "per_family_instance_index": 0,
+ "run_name": "Buddy_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 684,
+ "real_time": 9.6726054815869578e-01,
+ "cpu_time": 9.6718889619882953e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Erode2D_Constant_Padding/1",
+ "family_index": 15,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Erode2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4878,
+ "real_time": 1.4255527756701927e-01,
+ "cpu_time": 1.4255166113161119e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Opening2D_Constant_Padding/1",
+ "family_index": 16,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Opening2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3098,
+ "real_time": 2.2962313864483536e-01,
+ "cpu_time": 2.2960967947062669e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Closing2D_Constant_Padding/1",
+ "family_index": 17,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Closing2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 3043,
+ "real_time": 2.3499211144024237e-01,
+ "cpu_time": 2.3498555570160989e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "family_index": 18,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_TopHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2619,
+ "real_time": 2.6389677824296764e-01,
+ "cpu_time": 2.6389314623902327e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "family_index": 19,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_BottomHat2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2682,
+ "real_time": 2.5938538102451142e-01,
+ "cpu_time": 2.5937977852349003e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "family_index": 20,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 2671,
+ "real_time": 2.6242634423079897e-01,
+ "cpu_time": 2.6242075926619268e-01,
+ "time_unit": "ms"
+ },
+ {
+ "name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "family_index": 21,
+ "per_family_instance_index": 0,
+ "run_name": "OpenCV_Dilate2D_Constant_Padding/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 4966,
+ "real_time": 1.4111212860947409e-01,
+ "cpu_time": 1.4111019734192456e-01,
+ "time_unit": "ms"
+ }
+ ]
+}
diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
new file mode 100644
index 00000000..cf470496
--- /dev/null
+++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log
@@ -0,0 +1,56 @@
+2025-09-07T14:22:55+00:00
+Running ./bin/image-processing-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 3.05, 3.83, 4.90
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------------------------------------
+Eigen_Convolve2D/1 35.8 ms 35.8 ms 19
+MLIR_Conv2D/1 124 ms 124 ms 6
+Buddy_Conv2D/1 11.2 ms 11.2 ms 62
+Buddy_Corr2D_Constant_Padding/1 8.16 ms 8.16 ms 86
+OpenCV_Filter2D_Constant_Padding/1 6.05 ms 6.05 ms 115
+Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4691
+Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2560
+OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100967
+OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47401
+Buddy_Erosion2D_Constant_Padding/1 0.305 ms 0.305 ms 2397
+Buddy_Dilation2D_Constant_Padding/1 0.280 ms 0.280 ms 2198
+Buddy_Opening2D_Constant_Padding/1 0.520 ms 0.520 ms 1071
+Buddy_Closing2D_Constant_Padding/1 0.385 ms 0.385 ms 1510
+Buddy_TopHat2D_Constant_Padding/1 0.989 ms 0.989 ms 695
+Buddy_BottomHat2D_Constant_Padding/1 0.967 ms 0.967 ms 684
+OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4878
+OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3098
+OpenCV_Closing2D_Constant_Padding/1 0.235 ms 0.235 ms 3043
+OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2619
+OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2682
+OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2671
+OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4966
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
+Saved PNG file.
diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log
new file mode 100644
index 00000000..8b8610c9
--- /dev/null
+++ b/test_result/imageprocessing/image-processing-result.log
@@ -0,0 +1,129 @@
+Benchmark results - Sun Sep 7 14:19:23 UTC 2025
+Testing SSE support
+SSE is supported.
+Running image-processing-benchmark for SSE
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Testing AVX2 support
+AVX2 is supported.
+[Success] …
+Testing AVX2 support
+AVX2 is supported.
+Running image-processing-benchmark for AVX2
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+Running image-processing-benchmark for AVX2
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING
+[Success] …
+Testing AVX512 support
+CPU does not support AVX512.
+Testing NEON support
+CPU does not support NEON.
+[Success] …
+Testing AVX512 support
+CPU does not support AVX512.
+Testing NEON support
+CPU does not support NEON.
diff --git a/test_result/vectorization/vectorization_matrix.json b/test_result/vectorization/vectorization_matrix.json
new file mode 100644
index 00000000..90867db0
--- /dev/null
+++ b/test_result/vectorization/vectorization_matrix.json
@@ -0,0 +1,68 @@
+{
+ "context": {
+ "date": "2025-09-07T14:30:43+00:00",
+ "host_name": "4ed4bacfe45d",
+ "executable": "./vectorization-matrix-benchmark",
+ "num_cpus": 24,
+ "mhz_per_cpu": 5100,
+ "cpu_scaling_enabled": true,
+ "caches": [
+ {
+ "type": "Data",
+ "level": 1,
+ "size": 49152,
+ "num_sharing": 2
+ },
+ {
+ "type": "Instruction",
+ "level": 1,
+ "size": 32768,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 2,
+ "size": 1310720,
+ "num_sharing": 2
+ },
+ {
+ "type": "Unified",
+ "level": 3,
+ "size": 31457280,
+ "num_sharing": 24
+ }
+ ],
+ "load_avg": [2.97803,3.27148,4.20654],
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "MLIR_MatMul/1",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_MatMul/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 36434213,
+ "real_time": 1.9358822269403905e+01,
+ "cpu_time": 1.9357879145077188e+01,
+ "time_unit": "ns"
+ },
+ {
+ "name": "MLIR_MatVec/1",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "run_name": "MLIR_MatVec/1",
+ "run_type": "iteration",
+ "repetitions": 1,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 34006039,
+ "real_time": 2.0755498137698094e+01,
+ "cpu_time": 2.0755264822227605e+01,
+ "time_unit": "ns"
+ }
+ ]
+}
diff --git a/test_result/vectorization/vectorization_matrix.log b/test_result/vectorization/vectorization_matrix.log
new file mode 100644
index 00000000..3fa79ef0
--- /dev/null
+++ b/test_result/vectorization/vectorization_matrix.log
@@ -0,0 +1,21 @@
+2025-09-07T14:30:43+00:00
+Running ./vectorization-matrix-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 2.98, 3.27, 4.21
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------
+MLIR_MatMul/1 19.4 ns 19.4 ns 36434213
+MLIR_MatVec/1 20.8 ns 20.8 ns 34006039
+--------------------------------------------------------
+MLIR_MatMul: MLIR MatMul Operation + Nested Loop
+[ 18 18 18 18 18 18 18 18 18 18 ]
+--------------------------------------------------------
+MLIR_MatVec: MLIR MatVec Operation
+[ 18 18 18 18 18 18 18 18 18 18 ]
diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log
new file mode 100644
index 00000000..350170f5
--- /dev/null
+++ b/test_result/vectorization/vectorization_result.log
@@ -0,0 +1,217 @@
+Vectorization Benchmark - Sun Sep 7 14:30:36 UTC 2025
+[Info] Starting vectorization-matrix-benchmark build...
+[Info] Running CMake configuration...
+-- Detecting CXX compiler ABI info - failed
+-- Check for working CXX compiler: /usr/bin/c++
+CMake Error: CMAKE_CXX_COMPILER not set, after EnableLanguage
+CMake Error at /usr/share/cmake-3.22/Modules/CMakeTestCXXCompiler.cmake:49 (try_compile):
+ Failed to configure test project build system.
+Call Stack (most recent call first):
+ CMakeLists.txt:11 (project)
+
+
+-- Configuring incomplete, errors occurred!
+See also "/home/buddy-complier-workspace/buddy-benchmark/build/CMakeFiles/CMakeOutput.log".
+See also "/home/buddy-complier-workspace/buddy-benchmark/build/CMakeFiles/CMakeError.log".
+[Info] Building vectorization-matrix-benchmark...
+ninja: error: loading 'build.ninja': No such file or directory
+-- The CXX compiler identification is GNU 11.4.0
+-- The C compiler identification is GNU 11.4.0
+-- Detecting CXX compiler ABI info
+-- Detecting CXX compiler ABI info - done
+-- Check for working CXX compiler: /usr/bin/c++ - skipped
+-- Detecting CXX compile features
+-- Detecting CXX compile features - done
+-- Detecting C compiler ABI info
+-- Detecting C compiler ABI info - done
+-- Check for working C compiler: /usr/bin/cc - skipped
+-- Detecting C compile features
+-- Detecting C compile features - done
+-- Configuring Target Architecture: avx512f
+-- Configuring Target Triple: x86_64-unknown-linux-gnu
+-- Configuring benchmarks: google
+-- Looking for pthread.h
+-- Looking for pthread.h - found
+-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
+-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
+-- Found Threads: TRUE
+-- Performing Test HAVE_SSE
+-- Performing Test HAVE_SSE - Success
+-- SSE support - yes
+-- Performing Test HAVE_AVX2
+-- Performing Test HAVE_AVX2 - Success
+-- AVX2 support - yes
+-- Performing Test HAVE_AVX512
+-- Performing Test HAVE_AVX512 - Failed
+-- AVX512 support - no
+-- Performing Test HAVE_NEON
+-- Performing Test HAVE_NEON - Failed
+-- Arm Neon support - no
+-- Configuring done
+-- Generating done
+-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build
+[Info] Building vectorization-matrix-benchmark...
+[1/17] Generating mlir-matmul.o
+[2/17] Generating mlir-matvec.o
+[3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a
+[4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a
+[5/17] Creating directories for 'project_googlebenchmark'
+[6/17] Performing download step (git clone) for 'project_googlebenchmark'
+Cloning into 'project_googlebenchmark'...
+HEAD is now at f91b6b4 bump version to 1.6 in preparation for release
+[7/17] No update step for 'project_googlebenchmark'
+[8/17] No patch step for 'project_googlebenchmark'
+[9/17] Performing configure step for 'project_googlebenchmark'
+-- The CXX compiler identification is GNU 11.4.0
+-- Detecting CXX compiler ABI info
+-- Detecting CXX compiler ABI info - done
+-- Check for working CXX compiler: /usr/bin/c++ - skipped
+-- Detecting CXX compile features
+-- Detecting CXX compile features - done
+-- Failed to find LLVM FileCheck
+-- Found Git: /usr/bin/git (found version "2.34.1")
+-- git version: v1.6.0 normalized to 1.6.0
+-- Version: 1.6.0
+-- Performing Test HAVE_CXX_FLAG_STD_CXX11
+-- Performing Test HAVE_CXX_FLAG_STD_CXX11 - Success
+-- Performing Test HAVE_CXX_FLAG_WALL
+-- Performing Test HAVE_CXX_FLAG_WALL - Success
+-- Performing Test HAVE_CXX_FLAG_WEXTRA
+-- Performing Test HAVE_CXX_FLAG_WEXTRA - Success
+-- Performing Test HAVE_CXX_FLAG_WSHADOW
+-- Performing Test HAVE_CXX_FLAG_WSHADOW - Success
+-- Performing Test HAVE_CXX_FLAG_WERROR
+-- Performing Test HAVE_CXX_FLAG_WERROR - Success
+-- Performing Test HAVE_CXX_FLAG_WSUGGEST_OVERRIDE
+-- Performing Test HAVE_CXX_FLAG_WSUGGEST_OVERRIDE - Success
+-- Performing Test HAVE_CXX_FLAG_PEDANTIC
+-- Performing Test HAVE_CXX_FLAG_PEDANTIC - Success
+-- Performing Test HAVE_CXX_FLAG_PEDANTIC_ERRORS
+-- Performing Test HAVE_CXX_FLAG_PEDANTIC_ERRORS - Success
+-- Performing Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32
+-- Performing Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32 - Failed
+-- Performing Test HAVE_CXX_FLAG_FSTRICT_ALIASING
+-- Performing Test HAVE_CXX_FLAG_FSTRICT_ALIASING - Success
+-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS
+-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS - Success
+-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED
+-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED - Success
+-- Performing Test HAVE_CXX_FLAG_WSTRICT_ALIASING
+-- Performing Test HAVE_CXX_FLAG_WSTRICT_ALIASING - Success
+-- Performing Test HAVE_CXX_FLAG_WD654
+-- Performing Test HAVE_CXX_FLAG_WD654 - Failed
+-- Performing Test HAVE_CXX_FLAG_WTHREAD_SAFETY
+-- Performing Test HAVE_CXX_FLAG_WTHREAD_SAFETY - Failed
+-- Performing Test HAVE_CXX_FLAG_COVERAGE
+-- Performing Test HAVE_CXX_FLAG_COVERAGE - Success
+-- Performing Test HAVE_STD_REGEX
+CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message):
+ If you see build failures due to cross compilation, try setting
+ HAVE_STD_REGEX to 0
+Call Stack (most recent call first):
+ CMakeLists.txt:279 (cxx_feature_check)
+
+
+-- Performing Test HAVE_STD_REGEX -- success
+-- Performing Test HAVE_GNU_POSIX_REGEX
+-- Performing Test HAVE_GNU_POSIX_REGEX -- failed to compile
+-- Performing Test HAVE_POSIX_REGEX
+CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message):
+ If you see build failures due to cross compilation, try setting
+ HAVE_POSIX_REGEX to 0
+Call Stack (most recent call first):
+ CMakeLists.txt:281 (cxx_feature_check)
+
+
+-- Performing Test HAVE_POSIX_REGEX -- success
+-- Performing Test HAVE_STEADY_CLOCK
+CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message):
+ If you see build failures due to cross compilation, try setting
+ HAVE_STEADY_CLOCK to 0
+Call Stack (most recent call first):
+ CMakeLists.txt:290 (cxx_feature_check)
+
+
+-- Performing Test HAVE_STEADY_CLOCK -- success
+-- Looking for C++ include pthread.h
+-- Looking for C++ include pthread.h - found
+-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
+-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
+-- Found Threads: TRUE
+-- Configuring done
+-- Generating done
+-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build
+[10/17] Performing build step for 'project_googlebenchmark'
+[1/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o
+[2/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o
+[3/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o
+[4/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o
+[5/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o
+[6/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o
+[7/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o
+[8/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o
+[9/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o
+[10/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o
+[11/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o
+[12/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o
+[13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o
+[14/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o
+[15/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o
+[16/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o
+[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o
+[18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o
+[19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o
+[20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o
+[21/22] Linking CXX static library src/libbenchmark.a
+[22/22] Linking CXX static library src/libbenchmark_main.a
+[11/17] Performing install step for 'project_googlebenchmark'
+[0/1] Install the project...
+-- Install configuration: "Release"
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/libbenchmark.a
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/libbenchmark_main.a
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/include/benchmark
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/include/benchmark/benchmark.h
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkConfig.cmake
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkConfigVersion.cmake
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/pkgconfig/benchmark.pc
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkTargets.cmake
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkTargets-release.cmake
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/AssemblyTests.md
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/_config.yml
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/dependencies.md
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/index.md
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/perf_counters.md
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/platform_specific_build_instructions.md
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/random_interleaving.md
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/releasing.md
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/tools.md
+-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/user_guide.md
+[12/17] No test step for 'project_googlebenchmark'
+[13/17] Completed 'project_googlebenchmark'
+[14/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/Main.cpp.o
+[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o
+[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o
+[17/17] Linking CXX executable bin/vectorization-matrix-benchmark
+[Info] Running vectorization-matrix-benchmark...
+2025-09-07T14:30:43+00:00
+Running ./vectorization-matrix-benchmark
+Run on (24 X 5100 MHz CPU s)
+CPU Caches:
+ L1 Data 48 KiB (x12)
+ L1 Instruction 32 KiB (x12)
+ L2 Unified 1280 KiB (x12)
+ L3 Unified 30720 KiB (x1)
+Load Average: 2.98, 3.27, 4.21
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+--------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------
+MLIR_MatMul/1 19.4 ns 19.4 ns 36434213
+MLIR_MatVec/1 20.8 ns 20.8 ns 34006039
+--------------------------------------------------------
+MLIR_MatMul: MLIR MatMul Operation + Nested Loop
+[ 18 18 18 18 18 18 18 18 18 18 ]
+--------------------------------------------------------
+MLIR_MatVec: MLIR MatVec Operation
+[ 18 18 18 18 18 18 18 18 18 18 ]
diff --git a/thirdparty/README.md b/thirdparty/README.md
old mode 100644
new mode 100755
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/utils/plots/CMakeLists.txt b/utils/plots/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/utils/plots/Main.cpp b/utils/plots/Main.cpp
old mode 100644
new mode 100755
diff --git a/utils/plots/python/plot.py b/utils/plots/python/plot.py
old mode 100644
new mode 100755
diff --git a/utils/plots/python/plotools/.gitignore b/utils/plots/python/plotools/.gitignore
old mode 100644
new mode 100755
diff --git a/utils/plots/python/plotools/__init__.py b/utils/plots/python/plotools/__init__.py
old mode 100644
new mode 100755
diff --git a/utils/plots/python/plotools/compare.py b/utils/plots/python/plotools/compare.py
old mode 100644
new mode 100755
diff --git a/utils/plots/source_dir.h.in b/utils/plots/source_dir.h.in
old mode 100644
new mode 100755
diff --git a/validation/AudioProcessing/AudioValidationLib.cpp b/validation/AudioProcessing/AudioValidationLib.cpp
old mode 100644
new mode 100755
diff --git a/validation/AudioProcessing/CMakeLists.txt b/validation/AudioProcessing/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/validation/CMakeLists.txt b/validation/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/validation/Python/.gitignore b/validation/Python/.gitignore
old mode 100644
new mode 100755
diff --git a/validation/Python/__init__.py b/validation/Python/__init__.py
old mode 100644
new mode 100755
diff --git a/validation/Python/audio/__init__.py b/validation/Python/audio/__init__.py
old mode 100644
new mode 100755
diff --git a/validation/Python/audio/audio_file.py b/validation/Python/audio/audio_file.py
old mode 100644
new mode 100755
diff --git a/validation/Python/audio/audio_test.py b/validation/Python/audio/audio_test.py
old mode 100644
new mode 100755
diff --git a/validation/Python/audio/fir.py b/validation/Python/audio/fir.py
old mode 100644
new mode 100755
diff --git a/validation/Python/main.py b/validation/Python/main.py
old mode 100644
new mode 100755
diff --git a/validation/Python/requirements.txt b/validation/Python/requirements.txt
old mode 100644
new mode 100755
diff --git a/validation/Python/utils/__init__.py b/validation/Python/utils/__init__.py
old mode 100644
new mode 100755
diff --git a/validation/Python/utils/audio_format.py b/validation/Python/utils/audio_format.py
old mode 100644
new mode 100755
diff --git a/validation/Python/utils/lib_path.py b/validation/Python/utils/lib_path.py
old mode 100644
new mode 100755
diff --git a/validation/README.md b/validation/README.md
old mode 100644
new mode 100755