diff --git a/.clang-format b/.clang-format old mode 100644 new mode 100755 diff --git a/.gitattributes b/.gitattributes old mode 100644 new mode 100755 diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md old mode 100644 new mode 100755 diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md old mode 100644 new mode 100755 diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml new file mode 100755 index 00000000..20035f68 --- /dev/null +++ b/.github/workflows/bench.yml @@ -0,0 +1,169 @@ +# .github/workflows/bench.yml +name: Buddy-Benchmark CI + +on: + push: # fire on any branch + branches: ['**'] + pull_request: + workflow_dispatch: + inputs: + upstream_repo: + description: "Which repo changed (mlir|benchmark)" + required: false + upstream_sha: + description: "Upstream commit SHA for result folder" + required: false + +jobs: + bench: + runs-on: self-hosted + permissions: + contents: read + pages: write + id-token: write + + steps: + # ------------------------------------------------------------ + # 1) update the two local clones so they match the commit that + # triggered the run (fast-forward only for safety) + # ------------------------------------------------------------ + - name: Update local clones + run: | + set -e + for dir in buddy-benchmark buddy-mlir; do + cd /home/quliu/buddy-complier-workspace/$dir + git remote update + git pull --ff-only + done + + # ------------------------------------------------------------ + # 2) run everything in the Docker sandbox + # ------------------------------------------------------------ + - name: Build & run benchmarks + run: | + set -e + TEST_DIR=/home/quliu/buddy-complier-workspace/buddy-benchmark/test_result + if [ -d "$TEST_DIR" ] && [ "$(find "$TEST_DIR" -type f | wc -l)" -gt 0 ]; then + echo "[Skip] $TEST_DIR already has benchmark outputs; skipping build/run." + echo "[Info] File count: $(find "$TEST_DIR" -type f | wc -l)" + else + echo "[Run] No existing results detected; running benchmarks in Docker." + /home/quliu/buddy-complier-workspace/run_docker.sh + fi + # ------------------------------------------------------------ + # 2½) decide which date folder we’re about to publish + # ------------------------------------------------------------ + - name: Set BENCH_DATE and BENCH_DIR + run: | + bench_date=$(date +'%Y-%m-%d') + # Prefer a passed upstream SHA, otherwise use this workflow's SHA + run_sha="${{ github.event.inputs.upstream_sha || github.sha }}" + echo "BENCH_DATE=$bench_date" >> "$GITHUB_ENV" + echo "BENCH_DIR=$HOME/buddy-complier-workspace/buddy-benchmark/site/benchmarks/$bench_date/${run_sha}" >> "$GITHUB_ENV" + + # ------------------------------------------------------------ + # 3-5) (UNCHANGED) upload raw logs, convert to HTML, deploy Pages + # ------------------------------------------------------------ + - name: Upload raw logs as artifact + uses: actions/upload-artifact@v4 + with: + name: vectorization-logs-${{ github.sha }} + path: /home/quliu/buddy-complier-workspace/buddy-benchmark/test_result + retention-days: 30 + + - name: Build mini-site + working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark + run: | + rm -rf site + mkdir -p "${{ env.BENCH_DIR }}" + python3 scripts/logs2html.py test_result "${{ env.BENCH_DIR }}/" + + - name: Update benchmarks/latest redirect + working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks + run: | + set -e + # -------- pick the most recent dated folder (YYYY-MM-DD) ------------- + latest=$(ls -1d 20*/ | sort -r | head -n1 | tr -d '/') + echo "[Info] newest run is: $latest" + + # -------- rebuild the 'latest' folder with a meta-refresh ------------ + rm -rf latest + mkdir -p latest + printf '%s\n' "" > latest/index.html + echo "[Info] benchmarks/latest now points to ../${latest}/" + + # ------------------------------------------------------------ + # 4) make /benchmarks/ point to the most recent run as well + # ------------------------------------------------------------ + # (Removed) previous redirect-only index; replaced by full listing below + + - name: Upload site artifact + uses: actions/upload-pages-artifact@v3 + with: + path: /home/quliu/buddy-complier-workspace/buddy-benchmark/site + + - name: Build landing page for this run + run: | + run_root="${{ env.BENCH_DIR }}" + mkdir -p "$run_root" + + { + printf '%s\n' \ + '---' \ + 'layout: default' \ + 'title: Benchmark run' \ + 'nav_exclude: true' \ + '---' \ + '' \ + '

Benchmark results

' \ + '' \ + '' + } > "$run_root/index.html" + + + - name: Build top-level benchmarks index (list all runs) + working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site + run: | + set -e + out=benchmarks/index.html + mkdir -p benchmarks + { + printf '%s\n' \ + '---' \ + 'layout: default' \ + 'title: Benchmarks' \ + 'nav_exclude: true' \ + '---' \ + '

Benchmark runs

' \ + '

Select a date and commit:

' + + # List dates newest first + for d in $(ls -1d benchmarks/20*/ | sort -r); do + d=${d%/} + echo "

${d#benchmarks/}

" + echo "" + done + } > "$out" + + - name: Push benchmark results + uses: peaceiris/actions-gh-pages@v4 + with: + personal_token: ${{ secrets.BUDDY_SITE_PAT }} + external_repository: buddy-compiler/buddy-compiler.github.io + publish_dir: /home/quliu/buddy-complier-workspace/buddy-benchmark/site # <- root of generated site + publish_branch: master + keep_files: true # keep earlier runs + enable_jekyll: true diff --git a/.github/workflows/watch-upstream.yml b/.github/workflows/watch-upstream.yml new file mode 100644 index 00000000..49a8c56d --- /dev/null +++ b/.github/workflows/watch-upstream.yml @@ -0,0 +1,96 @@ +name: Watch upstream and trigger bench on change +on: + schedule: + - cron: "*/10 * * * *" # every 10 minutes + workflow_dispatch: + +permissions: + contents: write # to push state branch + actions: write # to dispatch workflows + +concurrency: + group: watch-upstream + cancel-in-progress: true + +jobs: + watch: + runs-on: self-hosted + steps: + - name: Checkout default branch + uses: actions/checkout@v4 + with: + ref: main # adjust if your default branch differs + fetch-depth: 0 + + - name: Create/switch to automation-state branch + run: | + set -e + git fetch origin automation-state || true + if git show-ref --verify --quiet refs/remotes/origin/automation-state; then + # Start from the remote state branch to avoid non-ff push + git checkout -B automation-state origin/automation-state + else + # First run: create a new state branch from current HEAD + git checkout -B automation-state + fi + + - name: Get upstream HEAD SHAs + id: head + run: | + set -e + MLIR_SHA=$(git ls-remote https://github.com/buddy-compiler/buddy-mlir.git refs/heads/main | awk '{print $1}') + BENCH_UP_SHA=$(git ls-remote https://github.com/buddy-compiler/buddy-benchmark.git refs/heads/main | awk '{print $1}') + echo "mlir=${MLIR_SHA}" >> $GITHUB_OUTPUT + echo "bench=${BENCH_UP_SHA}" >> $GITHUB_OUTPUT + + - name: Load previous SHAs + id: prev + run: | + echo "prev_mlir=$(cat mlir.sha 2>/dev/null || echo none)" >> $GITHUB_OUTPUT + echo "prev_bench=$(cat bench.sha 2>/dev/null || echo none)" >> $GITHUB_OUTPUT + + - name: Decide if changed + id: decide + run: | + changed=false + if [ "${{ steps.head.outputs.mlir }}" != "${{ steps.prev.outputs.prev_mlir }}" ] || \ + [ "${{ steps.head.outputs.bench }}" != "${{ steps.prev.outputs.prev_bench }}" ]; then + changed=true + fi + echo "changed=$changed" >> $GITHUB_OUTPUT + + - name: Determine which repo changed + if: steps.decide.outputs.changed == 'true' + id: which + run: | + if [ "${{ steps.head.outputs.mlir }}" != "${{ steps.prev.outputs.prev_mlir }}" ]; then + echo "repo=mlir" >> $GITHUB_OUTPUT + echo "sha=${{ steps.head.outputs.mlir }}" >> $GITHUB_OUTPUT + else + echo "repo=benchmark" >> $GITHUB_OUTPUT + echo "sha=${{ steps.head.outputs.bench }}" >> $GITHUB_OUTPUT + fi + + - name: Update state branch + if: steps.decide.outputs.changed == 'true' + run: | + set -e + printf "%s" "${{ steps.head.outputs.mlir }}" > mlir.sha + printf "%s" "${{ steps.head.outputs.bench }}" > bench.sha + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add mlir.sha bench.sha + git commit -m "state: mlir=${{ steps.head.outputs.mlir }} bench=${{ steps.head.outputs.bench }}" || echo "no changes" + # Re-sync and push with lease to avoid non-ff failures if remote advanced + git fetch origin automation-state || true + git rebase origin/automation-state || true + git push --force-with-lease=refs/heads/automation-state origin HEAD:automation-state + + - name: Dispatch bench.yml in this repo + if: steps.decide.outputs.changed == 'true' + run: | + curl -sS -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + https://api.github.com/repos/${{ github.repository }}/actions/workflows/bench.yml/dispatches \ + -d '{"ref":"main","inputs":{"upstream_repo":"${{ steps.which.outputs.repo }}","upstream_sha":"${{ steps.which.outputs.sha }}"}}' diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index bb52eca8..ef5a5a62 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,14 @@ # vscode configurations /.vscode + +# Third-party checkouts and external repos +/thirdparty/ + +# Generated sites and results +/site/ +/test_result/ + +# Local Python/venv stuff +__pycache__/ +.venv/ diff --git a/.gitmodules b/.gitmodules old mode 100644 new mode 100755 index 2c8ef1d1..7585fff9 --- a/.gitmodules +++ b/.gitmodules @@ -1,12 +1,16 @@ [submodule "thirdparty/opencv"] path = thirdparty/opencv url = https://github.com/opencv/opencv.git + ignore = all [submodule "thirdparty/Halide"] path = thirdparty/Halide url = https://github.com/halide/Halide.git + ignore = all [submodule "thirdparty/eigen"] path = thirdparty/eigen url = https://gitlab.com/libeigen/eigen.git + ignore = all [submodule "thirdparty/kfr"] path = thirdparty/kfr url = https://github.com/kfrlib/kfr.git + ignore = all diff --git a/.style.yapf b/.style.yapf old mode 100644 new mode 100755 diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Audios/NASA_Mars.wav b/benchmarks/AudioProcessing/Audios/NASA_Mars.wav old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/CMakeLists.txt b/benchmarks/AudioProcessing/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/BiquadOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/MLIRBiquad.mlir b/benchmarks/AudioProcessing/Operations/BiquadOp/MLIRBiquad.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/Main.cpp b/benchmarks/AudioProcessing/Operations/BiquadOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/Utils.hpp b/benchmarks/AudioProcessing/Operations/BiquadOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FFTOp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/FFTOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FFTOp/KFRFft.cpp b/benchmarks/AudioProcessing/Operations/FFTOp/KFRFft.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FFTOp/Main.cpp b/benchmarks/AudioProcessing/Operations/FFTOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/FIROp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIR.mlir b/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIR.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRTiledVectorization.mlir b/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRTiledVectorization.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRVectorization.mlir b/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRVectorization.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/Main.cpp b/benchmarks/AudioProcessing/Operations/FIROp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/Utils.hpp b/benchmarks/AudioProcessing/Operations/FIROp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/IIROp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/IIROp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRScalar.mlir b/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRScalar.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRVectorization.mlir b/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRVectorization.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/IIROp/Main.cpp b/benchmarks/AudioProcessing/Operations/IIROp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/IIROp/Utils.hpp b/benchmarks/AudioProcessing/Operations/IIROp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/RFFTOp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/RFFTOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/RFFTOp/GoogleBenchmarkMain.cpp b/benchmarks/AudioProcessing/Operations/RFFTOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/RFFTOp/RFFT.py b/benchmarks/AudioProcessing/Operations/RFFTOp/RFFT.py old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/README.md b/benchmarks/AudioProcessing/README.md old mode 100644 new mode 100755 diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/CMakeLists.txt b/benchmarks/DeepLearning/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/CMakeLists.txt b/benchmarks/DeepLearning/Layers/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/FFN/.gitignore b/benchmarks/DeepLearning/Layers/FFN/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/FFN/CMakeLists.txt b/benchmarks/DeepLearning/Layers/FFN/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/FFN/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Layers/FFN/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/FFN/buddy_ffn_import.py b/benchmarks/DeepLearning/Layers/FFN/buddy_ffn_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/.gitignore b/benchmarks/DeepLearning/Layers/RMSNorm/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/CMakeLists.txt b/benchmarks/DeepLearning/Layers/RMSNorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Layers/RMSNorm/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/buddy_rmsnorm_import.py b/benchmarks/DeepLearning/Layers/RMSNorm/buddy_rmsnorm_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/.gitignore b/benchmarks/DeepLearning/Layers/SelfAttention/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/CMakeLists.txt b/benchmarks/DeepLearning/Layers/SelfAttention/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Layers/SelfAttention/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/buddy_selfattention_import.py b/benchmarks/DeepLearning/Layers/SelfAttention/buddy_selfattention_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Bert/.gitignore b/benchmarks/DeepLearning/Models/Bert/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Bert/CMakeLists.txt b/benchmarks/DeepLearning/Models/Bert/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Bert/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/Bert/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Bert/buddy_bert_import.py b/benchmarks/DeepLearning/Models/Bert/buddy_bert_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/CMakeLists.txt b/benchmarks/DeepLearning/Models/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/.gitignore b/benchmarks/DeepLearning/Models/LeNet/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/CMakeLists.txt b/benchmarks/DeepLearning/Models/LeNet/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/LeNet/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/buddy_lenet_import.py b/benchmarks/DeepLearning/Models/LeNet/buddy_lenet_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/lenet_model.pth b/benchmarks/DeepLearning/Models/LeNet/lenet_model.pth old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/model.py b/benchmarks/DeepLearning/Models/LeNet/model.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/.gitignore b/benchmarks/DeepLearning/Models/MobileNet-V3/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/CMakeLists.txt b/benchmarks/DeepLearning/Models/MobileNet-V3/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/MobileNet-V3/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/buddy_mobilenetv3_import.py b/benchmarks/DeepLearning/Models/MobileNet-V3/buddy_mobilenetv3_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Resnet18/.gitignore b/benchmarks/DeepLearning/Models/Resnet18/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Resnet18/CMakeLists.txt b/benchmarks/DeepLearning/Models/Resnet18/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Resnet18/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/Resnet18/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Resnet18/buddy_resnet18_import.py b/benchmarks/DeepLearning/Models/Resnet18/buddy_resnet18_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/TinyLlama/.gitignore b/benchmarks/DeepLearning/Models/TinyLlama/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/TinyLlama/CMakeLists.txt b/benchmarks/DeepLearning/Models/TinyLlama/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/TinyLlama/Main.cpp b/benchmarks/DeepLearning/Models/TinyLlama/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/TinyLlama/Utils.hpp b/benchmarks/DeepLearning/Models/TinyLlama/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/TinyLlama/buddy_tinyllama_import.py b/benchmarks/DeepLearning/Models/TinyLlama/buddy_tinyllama_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Whisper/.gitignore b/benchmarks/DeepLearning/Models/Whisper/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Whisper/CMakeLists.txt b/benchmarks/DeepLearning/Models/Whisper/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Whisper/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/Whisper/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Whisper/buddy_whisper_import.py b/benchmarks/DeepLearning/Models/Whisper/buddy_whisper_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithAddfOp/ArithAddf.mlir b/benchmarks/DeepLearning/Ops/ArithAddfOp/ArithAddf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithAddfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithAddfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithAddfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithAddfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithDivfOp/ArithDivf.mlir b/benchmarks/DeepLearning/Ops/ArithDivfOp/ArithDivf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithDivfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithDivfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithDivfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithDivfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithMulfOp/ArithMulf.mlir b/benchmarks/DeepLearning/Ops/ArithMulfOp/ArithMulf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithMulfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithMulfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithMulfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithMulfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithNegfOp/ArithNegf.mlir b/benchmarks/DeepLearning/Ops/ArithNegfOp/ArithNegf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithNegfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithNegfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithNegfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithNegfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithSubfOp/ArithSubf.mlir b/benchmarks/DeepLearning/Ops/ArithSubfOp/ArithSubf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithSubfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithSubfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithSubfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithSubfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMul.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMul.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulBroadcast.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulBroadcast.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulSCF.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulSCF.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVec.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVec.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVecTile.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVecTile.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/BatchMatMulOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/Main.cpp b/benchmarks/DeepLearning/Ops/BatchMatMulOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/Utils.hpp b/benchmarks/DeepLearning/Ops/BatchMatMulOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/CMakeLists.txt b/benchmarks/DeepLearning/Ops/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/Conv2DNchwFchw.mlir b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/Conv2DNchwFchw.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/conv2d-nchw-fchw-im2col.mlir b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/conv2d-nchw-fchw-im2col.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwc.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwc.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVec.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVec.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVecRVV.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVecRVV.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Main.cpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Utils.hpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwc.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwc.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVec.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVec.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVecTile.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVecTile.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Main.cpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Utils.hpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/Conv2DNhwcHwcf.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/Conv2DNhwcHwcf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwc.mlir b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwc.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwcVec.mlir b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwcVec.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Main.cpp b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Utils.hpp b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MatMulOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/Main.cpp b/benchmarks/DeepLearning/Ops/MatMulOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/Utils.hpp b/benchmarks/DeepLearning/Ops/MatMulOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/matmul.mlir b/benchmarks/DeepLearning/Ops/MatMulOp/matmul.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Main.cpp b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/MatMulTransposeB.mlir b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/MatMulTransposeB.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Utils.hpp b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathExpOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MathExpOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathExpOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/MathExpOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathExpOp/MathExp.mlir b/benchmarks/DeepLearning/Ops/MathExpOp/MathExp.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathFpowOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MathFpowOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathFpowOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/MathFpowOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathFpowOp/MathFpow.mlir b/benchmarks/DeepLearning/Ops/MathFpowOp/MathFpow.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathRsqrtOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MathRsqrtOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathRsqrtOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/MathRsqrtOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathRsqrtOp/MathRsqrt.mlir b/benchmarks/DeepLearning/Ops/MathRsqrtOp/MathRsqrt.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/PoolingNhwcSum.mlir b/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/PoolingNhwcSum.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceAddfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ReduceAddfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceAddfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ReduceAddfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceAddfOp/ReduceAddf.mlir b/benchmarks/DeepLearning/Ops/ReduceAddfOp/ReduceAddf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceMaxfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ReduceMaxfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceMaxfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ReduceMaxfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceMaxfOp/ReduceMaxf.mlir b/benchmarks/DeepLearning/Ops/ReduceMaxfOp/ReduceMaxf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/SoftmaxExpSumDiv.mlir b/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/SoftmaxExpSumDiv.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/TransposeOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/Main.cpp b/benchmarks/DeepLearning/Ops/TransposeOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/Transpose2D.mlir b/benchmarks/DeepLearning/Ops/TransposeOp/Transpose2D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/Utils.hpp b/benchmarks/DeepLearning/Ops/TransposeOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/README.md b/benchmarks/DeepLearning/README.md old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/CMakeLists.txt b/benchmarks/Gemmini/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/CMakeLists.txt b/benchmarks/Gemmini/Ops/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/MatMulOp/CMakeLists.txt b/benchmarks/Gemmini/Ops/MatMulOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c b/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/MatMulOp/ExoUtils.h b/benchmarks/Gemmini/Ops/MatMulOp/ExoUtils.h old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/MatMulOp/Main.cpp b/benchmarks/Gemmini/Ops/MatMulOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/MatMulOp/matmul.mlir b/benchmarks/Gemmini/Ops/MatMulOp/matmul.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/README.md b/benchmarks/Gemmini/README.md old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/.gitattributes b/benchmarks/Gemmini/ResNet-101/.gitattributes old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/CMakeLists.txt b/benchmarks/Gemmini/ResNet-101/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/CRunnerUtils.cpp b/benchmarks/Gemmini/ResNet-101/CRunnerUtils.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/Main.cpp b/benchmarks/Gemmini/ResNet-101/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/ResNet101.mlir b/benchmarks/Gemmini/ResNet-101/ResNet101.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/images/Cat.h b/benchmarks/Gemmini/ResNet-101/images/Cat.h old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/images/Cat.jpg b/benchmarks/Gemmini/ResNet-101/images/Cat.jpg old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/include/Labels.h b/benchmarks/Gemmini/ResNet-101/include/Labels.h old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Utils.h b/benchmarks/Gemmini/Utils.h old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/BuddyConv2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyConv2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/BuddyCorr2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyCorr2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/BuddyMorph2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyMorph2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/BuddyResize2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyResize2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/CMakeLists.txt b/benchmarks/ImageProcessing/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/EigenConvolve2DBenchmark.cpp b/benchmarks/ImageProcessing/EigenConvolve2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu.png b/benchmarks/ImageProcessing/Images/YuTu.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu1022.png b/benchmarks/ImageProcessing/Images/YuTu1022.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu1024.png b/benchmarks/ImageProcessing/Images/YuTu1024.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu128.png b/benchmarks/ImageProcessing/Images/YuTu128.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu16.png b/benchmarks/ImageProcessing/Images/YuTu16.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu18.png b/benchmarks/ImageProcessing/Images/YuTu18.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu256.png b/benchmarks/ImageProcessing/Images/YuTu256.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu32.png b/benchmarks/ImageProcessing/Images/YuTu32.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu4.png b/benchmarks/ImageProcessing/Images/YuTu4.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu512.png b/benchmarks/ImageProcessing/Images/YuTu512.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu6.png b/benchmarks/ImageProcessing/Images/YuTu6.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu64.png b/benchmarks/ImageProcessing/Images/YuTu64.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu8.png b/benchmarks/ImageProcessing/Images/YuTu8.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/MLIRConv2D.mlir b/benchmarks/ImageProcessing/MLIRConv2D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/MLIRConv2DBenchmark.cpp b/benchmarks/ImageProcessing/MLIRConv2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Main.cpp b/benchmarks/ImageProcessing/Main.cpp old mode 100644 new mode 100755 index 3f2f2eeb..81b68103 --- a/benchmarks/ImageProcessing/Main.cpp +++ b/benchmarks/ImageProcessing/Main.cpp @@ -80,18 +80,18 @@ void registerBenchmarkOpenCVResize2D(); // Run benchmarks. int main(int argc, char **argv) { - if (argc != 5) { - throw std::invalid_argument( - "Wrong format of command line arguments.\n" - "Correct format is ./image-processing-benchmark \n where " - "image path provides path of the image to be processed, kernel name " - "denotes the name " - "of desired kernel as specified in " - "kernelmorph denotes the kernel to be used for morphological operations" - "include/ImageProcessing/Kernels.h and Boundary options available " - "are CONSTANT_PADDING, REPLICATE_PADDING.\n"); - } + // if (argc != 5) { + // throw std::invalid_argument( + // "Wrong format of command line arguments.\n" + // "Correct format is ./image-processing-benchmark \n where " + // "image path provides path of the image to be processed, kernel name " + // "denotes the name " + // "of desired kernel as specified in " + // "kernelmorph denotes the kernel to be used for morphological operations" + // "include/ImageProcessing/Kernels.h and Boundary options available " + // "are CONSTANT_PADDING, REPLICATE_PADDING.\n"); + // } Img img = dip::imread(argv[1], dip::IMGRD_GRAYSCALE); diff --git a/benchmarks/ImageProcessing/OpenCVFilter2DBenchmark.cpp b/benchmarks/ImageProcessing/OpenCVFilter2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/OpenCVMorph2DBenchmark.cpp b/benchmarks/ImageProcessing/OpenCVMorph2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/OpenCVResize2DBenchmark.cpp b/benchmarks/ImageProcessing/OpenCVResize2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/include/Kernels.h b/benchmarks/ImageProcessing/include/Kernels.h old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/CMakeLists.txt b/benchmarks/OpOptimization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/CMakeLists.txt b/benchmarks/OpOptimization/Conv2dNchwFchw/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchw.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchw.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBenchmark.cpp b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBroadcast.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBroadcast.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwIm2col.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwIm2col.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwWinagrad.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwWinagrad.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Main.cpp b/benchmarks/OpOptimization/Conv2dNchwFchw/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/CMakeLists.txt b/benchmarks/OpOptimization/MatMul/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/Main.cpp b/benchmarks/OpOptimization/MatMul/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/MatMul.mlir b/benchmarks/OpOptimization/MatMul/MatMul.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/MatMulBenchmark.cpp b/benchmarks/OpOptimization/MatMul/MatMulBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/MatMulBroadcast.mlir b/benchmarks/OpOptimization/MatMul/MatMulBroadcast.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/MatMulTransform.mlir b/benchmarks/OpOptimization/MatMul/MatMulTransform.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/TVM/.gitignore b/benchmarks/OpOptimization/MatMul/TVM/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/TVM/main.py b/benchmarks/OpOptimization/MatMul/TVM/main.py old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/TVM/matmul_autotvm.py b/benchmarks/OpOptimization/MatMul/TVM/matmul_autotvm.py old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/TVM/matmul_manual.py b/benchmarks/OpOptimization/MatMul/TVM/matmul_manual.py old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/CMakeLists.txt b/benchmarks/Vectorization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/MLIRMatMul.mlir b/benchmarks/Vectorization/MLIRMatMul.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/MLIRMatMulBenchmark.cpp b/benchmarks/Vectorization/MLIRMatMulBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/MLIRMatVec.mlir b/benchmarks/Vectorization/MLIRMatVec.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/MLIRMatVecBenchmark.cpp b/benchmarks/Vectorization/MLIRMatVecBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/Main.cpp b/benchmarks/Vectorization/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/CMakeLists.txt b/benchmarks/Vectorization/gccloops/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10a.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10a.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10aBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10aBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10b.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10b.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10bBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10bBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2a.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2a.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2aBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2aBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2b.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2b.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2bBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2bBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4a.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4a.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4aBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4aBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4b.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4b.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4bBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4bBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4c.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4c.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4cBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4cBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/Main.cpp b/benchmarks/Vectorization/gccloops/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/CMakeLists.txt b/benchmarks/Vectorization/linpackc/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyBenchmark.cpp b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF32.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF32.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF64.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF64.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF32.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF32.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF64.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF64.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/Main.cpp b/benchmarks/Vectorization/linpackc/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/CMakeLists.txt b/benchmarks/Vectorization/polybench/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench2mm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybench2mm.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench2mmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybench2mmBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench3mm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybench3mm.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench3mmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybench3mmBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAdi.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchAdi.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAdiBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchAdiBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAtax.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchAtax.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAtaxBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchAtaxBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchBicg.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchBicg.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchBicgBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchBicgBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCholesky.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchCholesky.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCholeskyBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchCholeskyBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelation.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelation.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelationBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelationBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCovariance.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchCovariance.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCovarianceBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchCovarianceBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDeriche.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchDeriche.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDericheBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchDericheBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgen.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgen.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgenBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgenBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDurbin.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchDurbin.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDurbinBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchDurbinBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshall.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshall.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshallBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshallBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGemm.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGemmBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemver.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGemver.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemverBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGemverBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGesummv.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGesummv.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGesummvBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGesummvBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidt.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidt.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidtBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidtBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLu.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchLu.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLuBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchLuBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmp.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmp.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmpBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmpBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchMvt.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchMvt.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchMvtBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchMvtBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchNussinov.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchNussinov.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchNussinovBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchNussinovBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSymm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSymm.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSymmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSymmBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2k.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2k.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2kBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2kBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyrk.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSyrk.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyrkBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSyrkBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolv.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolv.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolvBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolvBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrmm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchTrmm.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrmmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchTrmmBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/Main.cpp b/benchmarks/Vectorization/polybench/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/README.md b/benchmarks/Vectorization/polybench/README.md old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/Utils.hpp b/benchmarks/Vectorization/polybench/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/polybench_mlir_gen.py b/benchmarks/Vectorization/polybench/polybench_mlir_gen.py old mode 100644 new mode 100755 diff --git a/cmake/buddy-benchmark.cmake b/cmake/buddy-benchmark.cmake old mode 100644 new mode 100755 diff --git a/cmake/check-simd.cmake b/cmake/check-simd.cmake old mode 100644 new mode 100755 diff --git a/docs/ConvAlgorithms.md b/docs/ConvAlgorithms.md old mode 100644 new mode 100755 diff --git a/docs/DeepLearningBenchmark.md b/docs/DeepLearningBenchmark.md old mode 100644 new mode 100755 diff --git a/docs/GemminiConfig.md b/docs/GemminiConfig.md old mode 100644 new mode 100755 diff --git a/docs/Images/CoefficientsBroadcasting.png b/docs/Images/CoefficientsBroadcasting.png old mode 100644 new mode 100755 diff --git a/docs/PrepareRVOpenMP.md b/docs/PrepareRVOpenMP.md old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 diff --git a/scripts/logs2html.py b/scripts/logs2html.py new file mode 100755 index 00000000..a567930b --- /dev/null +++ b/scripts/logs2html.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +""" +Turn every *.json under into /.html. +If a twin *.log exists (same stem), show it in a collapsible
. +If the JSON is unreadable, generate a red “FAILED” page instead of aborting. +""" + +import html, json, pathlib, datetime, sys, traceback + +class BrokenJSON(RuntimeError): + pass + +src, dst = map(pathlib.Path, sys.argv[1:3]) +dst.mkdir(parents=True, exist_ok=True) +stamp = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC") + +CSS = """ + +""" + +def gbench_json_to_table(js_path: pathlib.Path) -> str: + """Turn one Google-Benchmark JSON file into an HTML .""" + try: + payload = json.loads(js_path.read_text()) + except json.JSONDecodeError as e: + raise BrokenJSON(f"JSON parse error: {e.msg}") from e + + if "benchmarks" not in payload: + raise BrokenJSON("Missing top-level ‘benchmarks’ array") + + data = payload["benchmarks"] + if not data: + raise BrokenJSON("Empty ‘benchmarks’ array") + + first = next((b for b in data if b.get("run_type") == "iteration"), None) + if not first: + raise BrokenJSON("No ‘iteration’ rows found") + + unit = html.escape(first.get("time_unit", "ns")) + + head = (f"" + f"") + + rows = "\n".join( + f"" + f"" + f"" + f"" + for b in data + if b.get("run_type") == "iteration" + ) + return f"

{js_path.name}

\n
NameTime ({unit})CPU ({unit})Iterations
{html.escape(b['name'])}{b['real_time']:.3g}{b['cpu_time']:.3g}{b['iterations']:,}
{head}\n{rows}
" + +# --------------------------------------------------------------------------- + +for js in src.rglob("*.json"): + print("→ parsing", js) + log = js.with_suffix(".log") + rel = js.relative_to(src) + page = dst / rel.with_suffix(".html") + page.parent.mkdir(parents=True, exist_ok=True) + + body = [CSS, f"

{rel}

{stamp}

"] + + try: + body.append(gbench_json_to_table(js)) + except (BrokenJSON, json.JSONDecodeError) as err: + # Build a failure stub but keep the run going + body.append(f"
⚠ FAILED: " + f"{html.escape(str(err))}
") + + # Always embed the console log if available + if log.exists(): + body.append("
Console output\n" + f"
{html.escape(log.read_text())}
") + + page.write_text("\n".join(body)) + +# --------------------------------------------------------------------------- +# Build a simple index in the destination root (dst) +# --------------------------------------------------------------------------- +links = "\n".join( + f'
  • ' + f'{p.relative_to(dst).as_posix()}
  • ' + for p in sorted(dst.rglob("*.html")) + if p.name != "index.html" +) + +(dst / "index.html").write_text( + CSS + f"

    Buddy-Benchmark results

      \n{links}\n
    " +) diff --git a/scripts/run_docker.sh b/scripts/run_docker.sh new file mode 100755 index 00000000..a48e21c3 --- /dev/null +++ b/scripts/run_docker.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -e + +# ➊ one container per run, killed automatically on exit +CID=$(docker run -d --name buddy-mlir-ci-test \ + --privileged \ + -v "${GITHUB_WORKSPACE}:/home/buddy-complier-workspace" \ + liuqun1006/buddycompiler-base:python sleep infinity) + +trap "docker rm -f ${CID}" EXIT + +# ➋ execute the whole build-and-test sequence inside +docker exec "${CID}" bash -lc ' + set -e + cd /home/buddy-complier-workspace/buddy-mlir + ./test.sh build-llvm + ./test.sh build-buddy + ./test.sh run + + cd /home/buddy-complier-workspace/buddy-benchmark/test + ./test_script_vectorizationprocessing.sh +' + +# ➌ bring the logs back to the host (under ./test_result) +docker cp "${CID}":/home/buddy-complier-workspace/buddy-benchmark/test_result ./test_result + diff --git a/test/test_script_audioprocessing.sh b/test/test_script_audioprocessing.sh new file mode 100755 index 00000000..e16ae655 --- /dev/null +++ b/test/test_script_audioprocessing.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +export BUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build +export LLVM_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/llvm/build +cd /home/buddy-complier-workspace/buddy-benchmark +mkdir -p build && cd build +cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DAUDIO_PROCESSING_BENCHMARKS=ON \ + -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \ + -DKFR_DIR=/home/buddy-complier-workspace/buddy-benchmark/thirdparty/kfr \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} +ninja dap-op-iir-benchmark +cd bin +./dap-op-iir-benchmark + + + +cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DAUDIO_PROCESSING_BENCHMARKS=ON \ + -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \ + -DKFR_DIR=/home/buddy-complier-workspace/buddy-benchmark/thirdparty/kfr \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \ + -DPYTHON_BINARY_DIR="$(dirname "$(which python3)")" + +ninja audio-plot +cd bin +./audio-plot ../../benchmarks/AudioProcessing/Audios/NASA_Mars.wav ResultKFRIir.wav +# " +# root@4f445bb41579:/home/buddy-complier-workspace/buddy-benchmark/build/bin# ./audio-plot ../../benchmarks/AudioProcessing/Audios/NASA_Mars.wav ResultKFRIir.wav +# Plotting now... +# Traceback (most recent call last): +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plot.py", line 71, in +# compare_wave(args.file1, args.file2, part=args.part, +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 120, in compare_wave +# after, time2 = get_time_domain(file2) +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 60, in get_time_domain +# info, samples = get_info_and_samples(file) +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 38, in get_info_and_samples +# with wave.open(file, 'rb') as audio: +# File "/usr/lib/python3.10/wave.py", line 509, in open +# return Wave_read(f) +# File "/usr/lib/python3.10/wave.py", line 159, in __init__ +# f = builtins.open(f, 'rb') +# FileNotFoundError: [Errno 2] No such file or directory: 'ResultKFRIir.wav' +# " \ No newline at end of file diff --git a/test/test_script_deeplearning.sh b/test/test_script_deeplearning.sh new file mode 100755 index 00000000..eef45e08 --- /dev/null +++ b/test/test_script_deeplearning.sh @@ -0,0 +1,232 @@ +#!/usr/bin/env bash + +################################################################################ +# 0. Script Setup +################################################################################ +# We disable "exit on error" so that if one benchmark fails to build or run, +# we can continue with the rest. +set +e + +################################################################################ +# 1. (Optional) Activate Python/Conda Environment +################################################################################ +# Uncomment or adjust if you use Anaconda/Miniconda: +# conda activate + + +################################################################################ +# 2. Build Each Benchmark (Continue Even If One Fails) +################################################################################ +BENCHMARK_TARGETS=( + # ------------------ + # Model-Level + # ------------------ + "dl-model-tinyllama-benchmark" + "dl-model-mobilenetv3-benchmark" + "dl-model-lenet-benchmark" + "dl-model-bert-benchmark" + "dl-model-whisper-benchmark" + "dl-model-resnet18-benchmark" + + # ------------------ + # Layer-Level + # ------------------ + "dl-layer-ffn-benchmark" + "dl-layer-selfattention-benchmark" + "dl-layer-rmsnorm-benchmark" + + # ------------------ + # Operation-Level + # ------------------ + "dl-op-linalg-matmul-benchmark" + "dl-op-linalg-conv2d-nchw-fchw-benchmark" + "dl-op-linalg-conv2d-nhwc-hwcf-benchmark" + "dl-op-linalg-conv2d-nhwc-fhwc-benchmark" + "dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark" + "dl-op-linalg-pooling-nhwc-sum-benchmark" + "dl-op-linalg-batch-matmul-benchmark" + "dl-op-linalg-arithaddf-benchmark" + "dl-op-linalg-arithdivf-benchmark" + "dl-op-linalg-arithmulf-benchmark" + "dl-op-linalg-arithnegf-benchmark" + "dl-op-linalg-arithsubf-benchmark" + "dl-op-linalg-mathfpow-benchmark" + "dl-op-linalg-mathrsqrt-benchmark" + "dl-op-linalg-mathexp-benchmark" + "dl-op-linalg-reduceaddf-benchmark" + "dl-op-linalg-reducemaxf-benchmark" + "dl-op-linalg-softmax-exp-sum-div-benchmark" + "dl-op-tosa-transpose-benchmark" + "dl-op-matmul-transpose-b-benchmark" +) + + +################################################################################ +# 3. Set Environment Variables for Buddy MLIR/LLVM +################################################################################ +# Adjust these paths according to your local setup: +BUDDY_MLIR_DIR="/home/buddy-complier-workspace/buddy-mlir" # The root directory of buddy-mlir +LLVM_BUILD_DIR="$BUDDY_MLIR_DIR/llvm/build" # The build dir for LLVM +BUDDY_BUILD_DIR="$BUDDY_MLIR_DIR/build" # The build dir for buddy-mlir + +# Export environment variables: +export BUDDY_MLIR_BUILD_DIR="$BUDDY_BUILD_DIR" +export LLVM_MLIR_BUILD_DIR="$LLVM_BUILD_DIR" +export PYTHONPATH="${LLVM_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_BUILD_DIR}/python_packages:${PYTHONPATH}" +export BENCHMARK_PATH="${BUDDY_MLIR_DIR}/../buddy-benchmark" +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" +echo "[Info] PYTHONPATH = ${PYTHONPATH}" + +################################################################################ +# 3. Prepare Build Folder and Run CMake +################################################################################ +cd "${BUDDY_MLIR_DIR}/../buddy-benchmark" || exit 1 +rm -rf build +mkdir -p build +cd build || exit 1 + +echo "[Info] Running CMake configuration..." +cmake -G Ninja .. \ + -DDEEP_LEARNING_BENCHMARKS=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DBUDDY_MLIR_BUILD_DIR="${BUDDY_MLIR_BUILD_DIR}" \ + -DCMAKE_CXX_COMPILER="${LLVM_MLIR_BUILD_DIR}/bin/clang++" \ + -DCMAKE_C_COMPILER="${LLVM_MLIR_BUILD_DIR}/bin/clang" \ + -DCMAKE_CXX_FLAGS="-march=native" \ + -DCMAKE_C_FLAGS="-march=native" + + +################################################################################ +# 4. Prepare Build Folder and Run CMake +################################################################################ + +mkdir -p $BENCHMARK_PATH/test_result +mkdir -p $BENCHMARK_PATH/test_result/deeplearning +BUILD_LOG="${BENCHMARK_PATH}/test_result/deeplearning/build_results_summary.log" +> "${BUILD_LOG}" # Clear/create the file + +echo "[Info] Building all benchmarks with Ninja..." +for target in "${BENCHMARK_TARGETS[@]}"; do + echo "==> ninja ${target}" + if ninja "${target}"; then + echo "[Success] Build of '${target}'" | tee -a "${BUILD_LOG}" + else + echo "[Failed] Build of '${target}'" | tee -a "${BUILD_LOG}" + fi +done + +################################################################################ +# 5. Run Each Benchmark & Redirect Output (Continue Even If One Fails) +################################################################################ +cd bin || exit 1 + +RUN_LOG="${BENCHMARK_PATH}/test_result/deeplearning/run_results_summary.log" +> "${RUN_LOG}" # clear / create the file + +echo "[Info] Running all benchmarks in ./bin..." +for target in "${BENCHMARK_TARGETS[@]}"; do + if [[ -f "${target}" ]]; then + echo "==> Running ${target}" + + # ---- NEW: dump a machine-readable report next to the plain log ----------- + json_out="${BENCHMARK_PATH}/test_result/deeplearning/${target}.json" + + if "./${target}" \ + --benchmark_out="${json_out}" \ + --benchmark_out_format=json \ + > "${BENCHMARK_PATH}/test_result/deeplearning/${target}.log" 2>&1 + then + echo "[Success] Run of '${target}'" | tee -a "${RUN_LOG}" + echo " ↳ stdout/stderr → ${target}.log" | tee -a "${RUN_LOG}" + echo " ↳ gbench JSON → ${target}.json" | tee -a "${RUN_LOG}" + else + echo "[Failed] Run of '${target}'" | tee -a "${RUN_LOG}" + echo " ↳ stdout/stderr → ${target}.log (may contain errors)" | tee -a "${RUN_LOG}" + fi + # ------------------------------------------------------------------------- + else + echo "[Missing] Executable not found for '${target}'" | tee -a "${RUN_LOG}" + fi +done + + +################################################################################ +# 6. Set Environment Variables for Buddy MLIR/LLVM for cross-compile +################################################################################ +# Adjust these paths according to your local setup: +BUDDY_MLIR_DIR="/home/buddy-complier-workspace/buddy-mlir" # The root directory of buddy-mlir +LLVM_BUILD_DIR="$BUDDY_MLIR_DIR/llvm/build" # The build dir for LLVM +BUDDY_BUILD_DIR="$BUDDY_MLIR_DIR/build" # The build dir for buddy-mlir + +# Export environment variables: +export BUDDY_MLIR_BUILD_DIR="$BUDDY_BUILD_DIR" +export LLVM_MLIR_BUILD_DIR="$LLVM_BUILD_DIR" +export PYTHONPATH="${LLVM_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_BUILD_DIR}/python_packages:${PYTHONPATH}" +export BUDDY_MLIR_BUILD_CROSS_DIR=${BUDDY_MLIR_BUILD_DIR}/../build +export RISCV_GNU_TOOLCHAIN=${BUDDY_MLIR_BUILD_DIR}/../thirdparty/riscv-gnu-toolchain +export RISCV_OMP_SHARED=${LLVM_MLIR_BUILD_DIR}/../build/lib/libomp.so +export BENCHMARK_PATH="${BUDDY_MLIR_DIR}/../buddy-benchmark" + +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" +echo "[Info] PYTHONPATH = ${PYTHONPATH}" + +################################################################################ +# 7. Prepare Build Folder and Run CMake +################################################################################ +cd "${BUDDY_MLIR_DIR}/../buddy-benchmark" || exit 1 +mkdir -p build +cd build || exit 1 + +echo "[Info] Running CMake configuration..." +cmake -G Ninja .. \ + -DDEEP_LEARNING_BENCHMARKS=ON \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DCROSS_COMPILE_RVV=ON \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=riscv \ + -DCMAKE_C_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang \ + -DRISCV_GNU_TOOLCHAIN=${RISCV_GNU_TOOLCHAIN} \ + -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \ + -DCMAKE_C_FLAGS="-march=rv64gcv --target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN}/sysroot --gcc-toolchain=${RISCV_GNU_TOOLCHAIN} -fPIC" \ + -DCMAKE_CXX_FLAGS="-march=rv64gcv --target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN}/sysroot --gcc-toolchain=${RISCV_GNU_TOOLCHAIN} -fPIC" \ + -DRISCV_OMP_SHARED=${RISCV_OMP_SHARED} \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \ + -DBUDDY_MLIR_BUILD_CROSS_DIR=${BUDDY_MLIR_BUILD_CROSS_DIR} \ + -DBUDDY_MLIR_CROSS_LIB_DIR=${BUDDY_MLIR_BUILD_CROSS_DIR}/lib + +################################################################################ +# 8. Prepare Build Folder and Run CMake for cross-compile +################################################################################ + +mkdir -p $BENCHMARK_PATH/test_result +BUILD_LOG="${BENCHMARK_PATH}/test_result/deeplearning/build_results_crosscompile_summary.log" +> "${BUILD_LOG}" # Clear/create the file + +echo "[Info] Building all benchmarks with Ninja..." +for target in "${BENCHMARK_TARGETS[@]}"; do + echo "==> ninja ${target}" + if ninja "${target}"; then + echo "[Success] Build of '${target}'" | tee -a "${BUILD_LOG}" + else + echo "[Failed] Build of '${target}'" | tee -a "${BUILD_LOG}" + fi +done + + +echo +echo "[Info] All build/run steps completed (script did not stop on failures)." +echo "[Info] Build summary: ${BUILD_LOG}" +echo "[Info] Run summary: ${RUN_LOG}" + + +cmake -G Ninja .. \ + -DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \ + -DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \ + -DPython3_EXECUTABLE=$(which python3) \ No newline at end of file diff --git a/test/test_script_geminiprocessing.sh b/test/test_script_geminiprocessing.sh new file mode 100755 index 00000000..b151cb5b --- /dev/null +++ b/test/test_script_geminiprocessing.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash + +export BUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build +export LLVM_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/llvm/build +export CHIPYARD_DIR=/home/buddy-complier-workspace/chipyard +export BUDDY_BENCHMARK_DIR=/home/buddy-complier-workspace/buddy-benchmark + +cd "${CHIPYARD_DIR}" +git config --global --add safe.directory /home/buddy-complier-workspace/chipyard +git checkout 1.8.1 + +# Initialize and update the 'generators/gemmini' submodule and any submodules inside it. +git config --global --add safe.directory /home/buddy-complier-workspace/chipyard/generators/gemmini +git submodule update --init --recursive generators/gemmini + +############################################# +# 1. Initialize Conda for the current shell +############################################# +conda init bash # or "conda init" if you’re already in a bash shell + +############################################# +# 2. Check if 'chipyard' environment exists +############################################# +if conda env list | grep -qE '^[^ ]*\s+chipyard\s'; then + echo "[INFO] Found existing 'chipyard' environment. Activating it." +else + echo "[INFO] 'chipyard' environment not found. Creating it..." + # Example creation command - adjust packages as needed + conda create -y -n chipyard python=3.10 \ + cmake ninja \ + # plus any other dependencies needed... +fi + +conda activate chipyard + +############################################# +# 3. Source build-setup and env.sh +############################################# +# If your script uses conda-lock or has pinned requirements, +# you might need to call build-setup.sh so it *creates* the +# .conda-env environment. But be sure it doesn’t conflict +# with your newly created 'chipyard' environment. +source build-setup.sh esp-tools +source env.sh + +############################################# +# 4. Proceed with your build +############################################# +cd "${BUDDY_BENCHMARK_DIR}" +rm -rf build +# Remove any existing build directory and create a fresh one. +mkdir -p build && cd build + +RESULT_DIR="${BUDDY_BENCHMARK_DIR}/test_result/geminiprocessing" +mkdir -p "${RESULT_DIR}" + +export C_PATH=$(which riscv64-unknown-linux-gnu-gcc) +export CXX_PATH=$(which riscv64-unknown-linux-gnu-g++) +export CLinker_PATH=$(which riscv64-unknown-linux-gnu-ld) + +# Print Address here +echo "[Info] C_COMPILER_PATH = ${C_PATH}" +echo "[Info] CXX_COMPILER_PATH = ${CXX_PATH}" +echo "[Info] C_LINKER_PATH = ${CLinker_PATH}" +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" +echo "[Info] CHIPYARD_DIR = ${CHIPYARD_DIR}" +echo "[Info] BUDDY_BENCHMARK_DIR = ${BUDDY_BENCHMARK_DIR}" +echo "[Info] RESULT_DIR = ${RESULT_DIR}" + +echo "[Info] Running CMake configuration..." +cmake -G Ninja .. \ + -DCMAKE_C_COMPILER=${C_PATH} \ + -DCMAKE_CXX_COMPILER=${CXX_PATH} \ + -DCMAKE_LINKER=${CLinker_PATH} \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \ + -DGEMMINI_INCLUDE_DIR=${CHIPYARD_DIR}/generators/gemmini/software/gemmini-rocc-tests/include/ \ + -DGEMMINI_BENCHMARKS=ON \ + 2>&1 | tee "${RESULT_DIR}/cmake_configure.log" + +ninja 2>&1 | tee "${RESULT_DIR}/build.log" + +# ```[1/21] Creating directories for 'project_googlebenchmark' +# [2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +# FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +# riscv64-unknown-linux-gnu-gcc -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c +# /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4': +# /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given +# 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); +# | ^ +# In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23:``` + +# cd bin +# ./vectorization-matrix-benchmark 2>&1 | tee "${RESULT_DIR}/run.log" + +echo "[Info] CMake, build, and run logs are stored in ${RESULT_DIR}" diff --git a/test/test_script_imageprocessing.sh b/test/test_script_imageprocessing.sh new file mode 100755 index 00000000..0bdf9fa1 --- /dev/null +++ b/test/test_script_imageprocessing.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +# NEW: Create results directory and update log file path +RESULT_DIR="${PWD}/test_result/imageprocessing" +mkdir -p "$RESULT_DIR" +LOG="${RESULT_DIR}/image-processing-result.log" +echo "Benchmark results - $(date)" > "$LOG" + +# Function to check CPU flag support +supports() { + local flag=$(echo "$1" | tr '[:upper:]' '[:lower:]') + if grep -qi "$flag" /proc/cpuinfo; then + return 0 + else + return 1 + fi +} + +features=("SSE" "AVX2" "AVX512" "NEON") +images=("../benchmarks/ImageProcessing/Images/YuTu.png") +kernels=("prewittKernelAlign" "sobel3x3KernelAlign" "sobel5x5KernelAlign" "sobel7x7KernelAlign" "sobel9x9KernelAlign" "laplacianKernelAlign" "logKernelAlign") +kernelmorphs=("random3x3KernelAlignInt") +boundaries=("CONSTANT_PADDING" "REPLICATE_PADDING") + +for feature in "${features[@]}"; do + echo "Testing $feature support" | tee -a "$LOG" + if supports "$feature"; then + echo "$feature is supported." | tee -a "$LOG" + mkdir -p build_${feature} && cd build_${feature} + cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DIMAGE_PROCESSING_BENCHMARKS=ON \ + -DOpenCV_DIR=$PWD/../thirdparty/opencv/build/ \ + -DEIGEN_DIR=$PWD/../thirdparty/eigen/ \ + -DBUDDY_OPT_ATTR=$(echo "$feature" | tr '[:upper:]' '[:lower:]') \ + -DBUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build + ninja image-processing-benchmark + echo "Running image-processing-benchmark for $feature" | tee -a "$LOG" + # --------------------------------------------------------------------------- + # inside the big loop – ONLY this section is changed + # --------------------------------------------------------------------------- + for img in "${images[@]}"; do + img_slug=$(basename "$img" .png) # YuTu → YuTu + for kern in "${kernels[@]}"; do + for morph in "${kernelmorphs[@]}"; do + for boundary in "${boundaries[@]}"; do + echo "Running: $img $kern $morph $boundary" | tee -a "$LOG" + + # ---- NEW: build a unique JSON filename --------------------------------- + slug="$(echo "${feature}_${img_slug}_${kern}_${morph}_${boundary}" \ + | tr ' /' '__')" + json_out="${RESULT_DIR}/${slug}.json" + log_out="${RESULT_DIR}/${slug}.log" + # ----------------------------------------------------------------------- + + ./bin/image-processing-benchmark \ + "$img" "$kern" "$morph" "$boundary" \ + --benchmark_out="$json_out" \ + --benchmark_out_format=json \ + > "$log_out" 2>&1 + echo "[Success] …" | tee -a "$LOG" + done + done + done + done + cd .. + else + echo "CPU does not support $feature." | tee -a "$LOG" + fi +done + +# NEW: Clean up build directories +for feature in "${features[@]}"; do + rm -rf "build_${feature}" +done \ No newline at end of file diff --git a/test/test_script_vectorizationprocessing.sh b/test/test_script_vectorizationprocessing.sh new file mode 100755 index 00000000..6bf34fbc --- /dev/null +++ b/test/test_script_vectorizationprocessing.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +# apt update +# apt install -y libc6-riscv64-cross +# apt install -y \ +# libc6-riscv64-cross \ +# libstdc++6-riscv64-cross \ +# libgcc-s1-riscv64-cross +################################################################################ +# 1. Script Setup +################################################################################ +set -e +BUDDY_MLIR_BUILD_DIR="/home/buddy-complier-workspace/buddy-mlir/build" +LLVM_MLIR_BUILD_DIR="/home/buddy-complier-workspace/buddy-mlir/llvm/build" +# Export environment variables: +PYTHONPATH="${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH}" +BUDDY_MLIR_BUILD_CROSS_DIR=${BUDDY_MLIR_BUILD_DIR}/../build +RISCV_GNU_TOOLCHAIN=${BUDDY_MLIR_BUILD_DIR}/../thirdparty/riscv-gnu-toolchain +RISCV_OMP_SHARED=${LLVM_MLIR_BUILD_DIR}/../build/lib/libomp.so +BENCHMARK_PATH="${BUDDY_MLIR_DIR}/../buddy-benchmark" + +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" + +RESULT_DIR="${PWD}/test_result/vectorization" +mkdir -p "${RESULT_DIR}" +LOG_FILE="${RESULT_DIR}/vectorization_result.log" +echo "Vectorization Benchmark - $(date)" > "${LOG_FILE}" + +################################################################################ +# 2. Build Benchmark +################################################################################ +cd /home/buddy-complier-workspace/buddy-benchmark +echo "[Info] Starting vectorization-matrix-benchmark build..." | tee -a "${LOG_FILE}" +rm -rf build +mkdir -p build && cd build +echo "[Info] Running CMake configuration..." | tee -a "${LOG_FILE}" +cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DVECTORIZATION_BENCHMARKS=ON \ + -DBUDDY_MLIR_BUILD_DIR="${BUDDY_MLIR_BUILD_DIR}" 2>&1 | tee -a "${LOG_FILE}" + +echo "[Info] Building vectorization-matrix-benchmark..." | tee -a "${LOG_FILE}" +ninja vectorization-matrix-benchmark 2>&1 | tee -a "${LOG_FILE}" + +export QEMU_LD_PREFIX=/usr/riscv64-linux-gnu +################################################################################ +# 3. Run Benchmark +################################################################################ +cd bin +echo "[Info] Running vectorization-matrix-benchmark..." | tee -a "${LOG_FILE}" +json_out="${RESULT_DIR}/vectorization_matrix.json" +log_out="${RESULT_DIR}/vectorization_matrix.log" + +./vectorization-matrix-benchmark \ + --benchmark_out="$json_out" \ + --benchmark_out_format=json \ + > "$log_out" 2>&1 +tee -a "$LOG_FILE" < "$log_out" + + +echo "[Info] Benchmark completed. Log saved to ${LOG_FILE}" \ No newline at end of file diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log new file mode 100644 index 00000000..ddd8a7cc --- /dev/null +++ b/test_result/deeplearning/build_results_crosscompile_summary.log @@ -0,0 +1,54 @@ +[Failed] Build of 'dl-model-tinyllama-benchmark' +[Failed] Build of 'dl-model-mobilenetv3-benchmark' +[Success] Build of 'dl-model-lenet-benchmark' +[Failed] Build of 'dl-model-bert-benchmark' +[Failed] Build of 'dl-model-whisper-benchmark' +[Failed] Build of 'dl-model-resnet18-benchmark' +[Success] Build of 'dl-layer-ffn-benchmark' +[Success] Build of 'dl-layer-selfattention-benchmark' +[Success] Build of 'dl-layer-rmsnorm-benchmark' +[Failed] Build of 'dl-op-linalg-matmul-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Success] Build of 'dl-op-linalg-arithaddf-benchmark' +[Success] Build of 'dl-op-linalg-arithdivf-benchmark' +[Success] Build of 'dl-op-linalg-arithmulf-benchmark' +[Success] Build of 'dl-op-linalg-arithnegf-benchmark' +[Success] Build of 'dl-op-linalg-arithsubf-benchmark' +[Success] Build of 'dl-op-linalg-mathfpow-benchmark' +[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Build of 'dl-op-linalg-mathexp-benchmark' +[Success] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Success] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Failed] Build of 'dl-op-tosa-transpose-benchmark' +[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' +[Failed] Build of 'dl-model-whisper-benchmark' +[Failed] Build of 'dl-model-resnet18-benchmark' +[Success] Build of 'dl-layer-ffn-benchmark' +[Success] Build of 'dl-layer-selfattention-benchmark' +[Success] Build of 'dl-layer-rmsnorm-benchmark' +[Failed] Build of 'dl-op-linalg-matmul-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Success] Build of 'dl-op-linalg-arithaddf-benchmark' +[Success] Build of 'dl-op-linalg-arithdivf-benchmark' +[Success] Build of 'dl-op-linalg-arithmulf-benchmark' +[Success] Build of 'dl-op-linalg-arithnegf-benchmark' +[Success] Build of 'dl-op-linalg-arithsubf-benchmark' +[Success] Build of 'dl-op-linalg-mathfpow-benchmark' +[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Build of 'dl-op-linalg-mathexp-benchmark' +[Success] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Success] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Failed] Build of 'dl-op-tosa-transpose-benchmark' +[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/deeplearning/build_results_summary.log b/test_result/deeplearning/build_results_summary.log new file mode 100644 index 00000000..0f7a7c2e --- /dev/null +++ b/test_result/deeplearning/build_results_summary.log @@ -0,0 +1,57 @@ +[Failed] Build of 'dl-model-mobilenetv3-benchmark' +[Failed] Build of 'dl-model-lenet-benchmark' +[Failed] Build of 'dl-model-bert-benchmark' +[Failed] Build of 'dl-model-whisper-benchmark' +[Failed] Build of 'dl-model-resnet18-benchmark' +[Failed] Build of 'dl-layer-ffn-benchmark' +[Failed] Build of 'dl-layer-selfattention-benchmark' +[Failed] Build of 'dl-layer-rmsnorm-benchmark' +[Failed] Build of 'dl-op-linalg-matmul-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Failed] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Failed] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Failed] Build of 'dl-op-linalg-arithaddf-benchmark' +[Failed] Build of 'dl-op-linalg-arithdivf-benchmark' +[Failed] Build of 'dl-op-linalg-arithmulf-benchmark' +[Failed] Build of 'dl-op-linalg-arithnegf-benchmark' +[Failed] Build of 'dl-op-linalg-arithsubf-benchmark' +[Failed] Build of 'dl-op-linalg-mathfpow-benchmark' +[Failed] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Failed] Build of 'dl-op-linalg-mathexp-benchmark' +[Failed] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Failed] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Failed] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Failed] Build of 'dl-op-tosa-transpose-benchmark' +[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' +[Failed] Build of 'dl-model-tinyllama-benchmark' +[Failed] Build of 'dl-model-mobilenetv3-benchmark' +[Success] Build of 'dl-model-lenet-benchmark' +[Failed] Build of 'dl-model-bert-benchmark' +[Failed] Build of 'dl-model-whisper-benchmark' +[Failed] Build of 'dl-model-resnet18-benchmark' +[Success] Build of 'dl-layer-ffn-benchmark' +[Success] Build of 'dl-layer-selfattention-benchmark' +[Success] Build of 'dl-layer-rmsnorm-benchmark' +[Failed] Build of 'dl-op-linalg-matmul-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Success] Build of 'dl-op-linalg-arithaddf-benchmark' +[Success] Build of 'dl-op-linalg-arithdivf-benchmark' +[Success] Build of 'dl-op-linalg-arithmulf-benchmark' +[Success] Build of 'dl-op-linalg-arithnegf-benchmark' +[Success] Build of 'dl-op-linalg-arithsubf-benchmark' +[Success] Build of 'dl-op-linalg-mathfpow-benchmark' +[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Build of 'dl-op-linalg-mathexp-benchmark' +[Success] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Success] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Failed] Build of 'dl-op-tosa-transpose-benchmark' +[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.json b/test_result/deeplearning/dl-layer-ffn-benchmark.json new file mode 100644 index 00000000..fdea2004 --- /dev/null +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:45:30+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-layer-ffn-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.51807,3.40967,5.1626], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_LAYER_FFN/Scalar", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_FFN/Scalar", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10218, + "real_time": 6.7533425334895703e-02, + "cpu_time": 6.7531935701702864e-02, + "time_unit": "ms" + }, + { + "name": "DL_LAYER_FFN/Auto_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_FFN/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 26193, + "real_time": 2.6626899614870417e-02, + "cpu_time": 2.6626213683045089e-02, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json new file mode 100644 index 00000000..e7f27984 --- /dev/null +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:45:34+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-layer-rmsnorm-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.47656,3.38623,5.14551], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_LAYER_RMSNORM/Scalar", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_RMSNORM/Scalar", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 339474, + "real_time": 1.9830409605425532e-03, + "cpu_time": 1.9829382397473739e-03, + "time_unit": "ms" + }, + { + "name": "DL_LAYER_RMSNORM/Auto_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_RMSNORM/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 780156, + "real_time": 8.9165813354251345e-04, + "cpu_time": 8.9162349196827311e-04, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.json b/test_result/deeplearning/dl-layer-selfattention-benchmark.json new file mode 100644 index 00000000..f66a0d7e --- /dev/null +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:45:32+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-layer-selfattention-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.51807,3.40967,5.1626], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_LAYER_ATTENTION/Scalar", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_ATTENTION/Scalar", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 144, + "real_time": 4.8677878868248730e+00, + "cpu_time": 4.8676234444444439e+00, + "time_unit": "ms" + }, + { + "name": "DL_LAYER_ATTENTION/Auto_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_ATTENTION/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 435, + "real_time": 1.5936243722493622e+00, + "cpu_time": 1.5935723448275860e+00, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.json b/test_result/deeplearning/dl-model-lenet-benchmark.json new file mode 100644 index 00000000..f50ed8e9 --- /dev/null +++ b/test_result/deeplearning/dl-model-lenet-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:41:48+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-model-lenet-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.67334,4.12793,5.80713], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_MODEL_LENET/Auto_Vectorization", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_LENET/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4111, + "real_time": 1.7333792885473193e-01, + "cpu_time": 1.7333462247628315e-01, + "time_unit": "ms" + }, + { + "name": "DL_MODEL_LENET/Buddy_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_LENET/Buddy_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4846, + "real_time": 1.4355380335623599e-01, + "cpu_time": 1.4355146595130003e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json new file mode 100644 index 00000000..dd135dd9 --- /dev/null +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:41:45+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-model-mobilenetv3-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.67334,4.12793,5.80713], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_MobileNet_V3/BM_MobileNet_V3_scalar", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_MobileNet_V3/BM_MobileNet_V3_scalar", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 17, + "real_time": 3.9183362222769681e+01, + "cpu_time": 3.9182252941176472e+01, + "time_unit": "ms" + }, + { + "name": "BM_MobileNet_V3/BM_MobileNet_V3_conv_opt", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_MobileNet_V3/BM_MobileNet_V3_conv_opt", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.4668323397636414e+01, + "cpu_time": 3.4667267849999995e+01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log new file mode 100644 index 00000000..9a53be36 --- /dev/null +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log @@ -0,0 +1,19 @@ +2025-09-07T12:41:45+00:00 +Running ./dl-model-mobilenetv3-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.67, 4.13, 5.81 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +----------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +----------------------------------------------------------------------------------- +BM_MobileNet_V3/BM_MobileNet_V3_scalar 39.2 ms 39.2 ms 17 +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 34.7 ms 34.7 ms 20 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.json b/test_result/deeplearning/dl-model-resnet18-benchmark.json new file mode 100644 index 00000000..1628c1ef --- /dev/null +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:45:27+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-model-resnet18-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.47607,3.41699,5.17432], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_MODEL_Resnet18/Auto_Vectorization", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_Resnet18/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.6702358201146126e+02, + "cpu_time": 7.6673241800000005e+02, + "time_unit": "ms" + }, + { + "name": "DL_MODEL_Resnet18/Buddy_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_Resnet18/Buddy_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.7053957059979439e+02, + "cpu_time": 7.7040162699999996e+02, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.log b/test_result/deeplearning/dl-model-resnet18-benchmark.log new file mode 100644 index 00000000..97e62844 --- /dev/null +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.log @@ -0,0 +1,18 @@ +2025-09-07T12:45:27+00:00 +Running ./dl-model-resnet18-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.48, 3.42, 5.17 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------- +DL_MODEL_Resnet18/Auto_Vectorization 767 ms 767 ms 1 +DL_MODEL_Resnet18/Buddy_Vectorization 771 ms 770 ms 1 +----------------------------------------------------------- +Correctness Verification: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.json b/test_result/deeplearning/dl-model-tinyllama-benchmark.json new file mode 100644 index 00000000..3961b66f --- /dev/null +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.json @@ -0,0 +1,82 @@ +{ + "context": { + "date": "2025-09-07T12:35:22+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-model-tinyllama-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [4.896,5.53271,6.99316], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_MODEL_TINYLLAMA/scalar", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_TINYLLAMA/scalar", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.7120061315596104e+05, + "cpu_time": 1.7119792047700004e+05, + "time_unit": "ms" + }, + { + "name": "DL_MODEL_TINYLLAMA/matmul_opt", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_TINYLLAMA/matmul_opt", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.1143549453467131e+04, + "cpu_time": 1.1135273949000009e+04, + "time_unit": "ms" + }, + { + "name": "DL_MODEL_TINYLLAMA/matmul_opt_omp", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_TINYLLAMA/matmul_opt_omp", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 8.3347530625760555e+03, + "cpu_time": 7.7325455960000227e+03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.log b/test_result/deeplearning/dl-model-tinyllama-benchmark.log new file mode 100644 index 00000000..b6f53ed8 --- /dev/null +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.log @@ -0,0 +1,19 @@ +2025-09-07T12:35:22+00:00 +Running ./dl-model-tinyllama-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 4.90, 5.53, 6.99 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +---------------------------------------------------------------------------- +Benchmark Time CPU Iterations +---------------------------------------------------------------------------- +DL_MODEL_TINYLLAMA/scalar 171201 ms 171198 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt 11144 ms 11135 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt_omp 8335 ms 7733 ms 1 +---------- Verification ---------- +matmul_opt PASS +matmul_opt_omp PASS diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.json b/test_result/deeplearning/dl-model-whisper-benchmark.json new file mode 100644 index 00000000..add9864c --- /dev/null +++ b/test_result/deeplearning/dl-model-whisper-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:41:50+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-model-whisper-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.69971,4.12549,5.79736], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_MODEL_Whisper/Auto_Vectorization", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_Whisper/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 8.8294716205447912e+04, + "cpu_time": 8.8293256732999987e+04, + "time_unit": "ms" + }, + { + "name": "DL_MODEL_Whisper/Buddy_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_Whisper/Buddy_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.0465919472277164e+04, + "cpu_time": 4.0458067526999999e+04, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.log b/test_result/deeplearning/dl-model-whisper-benchmark.log new file mode 100644 index 00000000..8cfcecec --- /dev/null +++ b/test_result/deeplearning/dl-model-whisper-benchmark.log @@ -0,0 +1,15 @@ +2025-09-07T12:41:50+00:00 +Running ./dl-model-whisper-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.70, 4.13, 5.80 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------------------- +DL_MODEL_Whisper/Auto_Vectorization 88295 ms 88293 ms 1 +DL_MODEL_Whisper/Buddy_Vectorization 40466 ms 40458 ms 1 diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json new file mode 100644 index 00000000..bc957097 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:46:04+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-arithaddf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.34521,3.26758,5.04932], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_ADDF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_ADDF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22527, + "real_time": 3.1060902958688446e-02, + "cpu_time": 3.1059653438096500e-02, + "time_unit": "ms" + }, + { + "name": "BM_ADDF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_ADDF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 169988, + "real_time": 4.8817289258969946e-03, + "cpu_time": 4.8816731710473685e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json new file mode 100644 index 00000000..629be93f --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:46:07+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-arithdivf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.34521,3.26758,5.04932], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_DIVF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_DIVF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22003, + "real_time": 3.2068282908939941e-02, + "cpu_time": 3.2067214334408942e-02, + "time_unit": "ms" + }, + { + "name": "BM_DIVF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_DIVF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 69823, + "real_time": 1.0602428337310130e-02, + "cpu_time": 1.0602179811809862e-02, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json new file mode 100644 index 00000000..a7052857 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:46:09+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-arithmulf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.39795,3.2627,5.03809], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_MULF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_MULF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22824, + "real_time": 3.0709744117373212e-02, + "cpu_time": 3.0708664607430772e-02, + "time_unit": "ms" + }, + { + "name": "BM_MULF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_MULF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 169993, + "real_time": 4.1166770421966290e-03, + "cpu_time": 4.1165691234344949e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json new file mode 100644 index 00000000..282e1318 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:46:11+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-arithnegf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.39795,3.2627,5.03809], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_NEGF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_NEGF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 29588, + "real_time": 2.3588028378715157e-02, + "cpu_time": 2.3587252737596327e-02, + "time_unit": "ms" + }, + { + "name": "BM_NEGF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_NEGF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 237464, + "real_time": 2.9502898712950253e-03, + "cpu_time": 2.9501475507866456e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json new file mode 100644 index 00000000..4660fe60 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:46:13+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-arithsubf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.39795,3.2627,5.03809], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_SUBF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_SUBF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22687, + "real_time": 3.0744381588195889e-02, + "cpu_time": 3.0743076916295679e-02, + "time_unit": "ms" + }, + { + "name": "BM_SUBF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_SUBF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 170328, + "real_time": 4.1076257294038214e-03, + "cpu_time": 4.1075212472406180e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json new file mode 100644 index 00000000..a25979f5 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json @@ -0,0 +1,138 @@ +{ + "context": { + "date": "2025-09-07T12:45:54+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-batch-matmul-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.40869,3.31104,5.08252], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_BATCH_MATMUL/Scalar/iterations:1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/Scalar/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.6347736530005932e+03, + "cpu_time": 3.6346553479999998e+03, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.0061066299676895e+03, + "cpu_time": 1.0060745660000001e+03, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/Vectorization/iterations:1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/Vectorization/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.9591017067432404e+02, + "cpu_time": 1.9590338600000035e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/Tile/iterations:1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/Tile/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.1179352924227715e+02, + "cpu_time": 1.1179250600000046e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/SCF/iterations:1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/SCF/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.2078800052404404e+02, + "cpu_time": 1.2078363899999988e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.6683125793933868e+02, + "cpu_time": 3.6682773099999986e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.1074854433536530e+02, + "cpu_time": 2.2687625000000544e+01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log new file mode 100644 index 00000000..8d059c82 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log @@ -0,0 +1,25 @@ +2025-09-07T12:45:54+00:00 +Running ./dl-op-linalg-batch-matmul-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.41, 3.31, 5.08 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +--------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +--------------------------------------------------------------------------------------------- +DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3635 ms 3635 ms 1 +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 1006 ms 1006 ms 1 +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 196 ms 196 ms 1 +DL_OPS_BATCH_MATMUL/Tile/iterations:1 112 ms 112 ms 1 +DL_OPS_BATCH_MATMUL/SCF/iterations:1 121 ms 121 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 367 ms 367 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 111 ms 22.7 ms 1 +---------- Verification ---------- +Tile PASS +SCF PASS +BROADCAST PASS +BROADCAST_OMP PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json new file mode 100644 index 00000000..3a979642 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:45:47+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-conv2d-nchw-fchw-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.4834,3.35645,5.1167], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_Conv2DNchwFchw_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_Conv2DNchwFchw_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2, + "real_time": 2.9066542163491249e+02, + "cpu_time": 2.9065969050000001e+02, + "time_unit": "ms" + }, + { + "name": "BM_Conv2DNchwFchw_Im2col", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_Conv2DNchwFchw_Im2col", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 72, + "real_time": 8.5637474743028488e+00, + "cpu_time": 8.5636718611111107e+00, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json new file mode 100644 index 00000000..595bcecd --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json @@ -0,0 +1,96 @@ +{ + "context": { + "date": "2025-09-07T12:45:51+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-conv2d-nhwc-fhwc-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.44434,3.3335,5.09961], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 7.3888380080461502e+01, + "cpu_time": 7.3885279400000002e+01, + "time_unit": "ms" + }, + { + "name": "DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 9.7335599362850189e+00, + "cpu_time": 9.7335111999999988e+00, + "time_unit": "ms" + }, + { + "name": "DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.8217429518699646e+00, + "cpu_time": 1.8217338000000027e+00, + "time_unit": "ms" + }, + { + "name": "DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.7791815102100372e+00, + "cpu_time": 1.7791528000000056e+00, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json new file mode 100644 index 00000000..17a679c1 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:45:49+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-conv2d-nhwc-hwcf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.44434,3.3335,5.09961], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_CONV_2D_NHWC_HWCF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_CONV_2D_NHWC_HWCF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 21, + "real_time": 3.3404812571548277e+01, + "cpu_time": 3.3404357952380956e+01, + "time_unit": "ms" + }, + { + "name": "BM_CONV_2D_NHWC_HWCF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_CONV_2D_NHWC_HWCF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 110, + "real_time": 6.2886948273940524e+00, + "cpu_time": 6.2886236181818180e+00, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json new file mode 100644 index 00000000..b1b91623 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json @@ -0,0 +1,82 @@ +{ + "context": { + "date": "2025-09-07T12:45:52+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.44434,3.3335,5.09961], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 4.3137572705745697e+00, + "cpu_time": 4.3121678000000001e+00, + "time_unit": "ms" + }, + { + "name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.7169959843158722e+00, + "cpu_time": 1.7169760000000007e+00, + "time_unit": "ms" + }, + { + "name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.2791678309440613e-01, + "cpu_time": 1.2791580000000025e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log new file mode 100644 index 00000000..5e616453 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log @@ -0,0 +1 @@ +qemu-riscv64-static: Could not open '/lib/ld-linux-riscv64-lp64d.so.1': No such file or directory diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json new file mode 100644 index 00000000..1cc5b9de --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:46:19+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-mathexp-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.49072,3.25391,5.01562], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_EXP_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_EXP_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 14801, + "real_time": 4.7153966502236092e-02, + "cpu_time": 4.7153170799270318e-02, + "time_unit": "ms" + }, + { + "name": "BM_EXP_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_EXP_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 21304, + "real_time": 3.2612131513344626e-02, + "cpu_time": 3.2610319517461503e-02, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json new file mode 100644 index 00000000..0b5c9ab3 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:46:15+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-mathfpow-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.44629,3.2583,5.02686], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_FPOW_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_FPOW_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 8174, + "real_time": 8.5793241880358306e-02, + "cpu_time": 8.5789674944947408e-02, + "time_unit": "ms" + }, + { + "name": "BM_FPOW_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_FPOW_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 11919, + "real_time": 5.8559470965724454e-02, + "cpu_time": 5.8556822887826147e-02, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json new file mode 100644 index 00000000..e8085b93 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:46:17+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-mathrsqrt-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.44629,3.2583,5.02686], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_RSQRT_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_RSQRT_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 9351, + "real_time": 7.4849401154169840e-02, + "cpu_time": 7.4846361458667521e-02, + "time_unit": "ms" + }, + { + "name": "BM_RSQRT_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_RSQRT_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 155807, + "real_time": 4.4754421888014168e-03, + "cpu_time": 4.4753065651735808e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json new file mode 100644 index 00000000..2cc316a3 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json @@ -0,0 +1,110 @@ +{ + "context": { + "date": "2025-09-07T12:45:36+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-matmul-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.47656,3.38623,5.14551], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_MATMUL/scalar_O0/iterations:1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL/scalar_O0/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.0999811291694641e+03, + "cpu_time": 4.0998556719999997e+03, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL/scalar_O3/iterations:1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL/scalar_O3/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.5827754400670528e+03, + "cpu_time": 3.5826578559999998e+03, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL/tile/iterations:1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL/tile/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.0819802060723305e+02, + "cpu_time": 1.0819740099999997e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL/vec/iterations:1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL/vec/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 6.1437729746103287e+01, + "cpu_time": 6.1437198000000137e+01, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL/vec_omp/iterations:1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL/vec_omp/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.8467400223016739e+01, + "cpu_time": 7.8750589999998510e+00, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log new file mode 100644 index 00000000..b46496bd --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log @@ -0,0 +1,22 @@ +2025-09-07T12:45:36+00:00 +Running ./dl-op-linalg-matmul-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.48, 3.39, 5.15 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------------------- +DL_OPS_MATMUL/scalar_O0/iterations:1 4100 ms 4100 ms 1 +DL_OPS_MATMUL/scalar_O3/iterations:1 3583 ms 3583 ms 1 +DL_OPS_MATMUL/tile/iterations:1 108 ms 108 ms 1 +DL_OPS_MATMUL/vec/iterations:1 61.4 ms 61.4 ms 1 +DL_OPS_MATMUL/vec_omp/iterations:1 18.5 ms 7.88 ms 1 +---------- Verification ---------- +tile PASS +vec PASS +vec_omp PASS diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json new file mode 100644 index 00000000..e0b2bec9 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:45:52+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-pooling-nhwc-sum-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.44434,3.3335,5.09961], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_POOLING_NHWC_SUM_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_POOLING_NHWC_SUM_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2922, + "real_time": 2.4033439441913615e-01, + "cpu_time": 2.4032710540725533e-01, + "time_unit": "ms" + }, + { + "name": "BM_POOLING_NHWC_SUM_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_POOLING_NHWC_SUM_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 16330, + "real_time": 4.2958081279586446e-02, + "cpu_time": 4.2957538089406000e-02, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json new file mode 100644 index 00000000..82f932ac --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json @@ -0,0 +1,38 @@ +{ + "context": { + "date": "2025-09-07T12:46:21+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-reduceaddf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.49072,3.25391,5.01562], + "library_build_type": "release" + }, + "benchmarks": [ diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json new file mode 100644 index 00000000..fdb7b390 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json @@ -0,0 +1,38 @@ +{ + "context": { + "date": "2025-09-07T12:46:21+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-reducemaxf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.49072,3.25391,5.01562], + "library_build_type": "release" + }, + "benchmarks": [ diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json new file mode 100644 index 00000000..d57079ca --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:46:21+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-softmax-exp-sum-div-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.49072,3.25391,5.01562], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_SOFTMAXEXPSUMDIV_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_SOFTMAXEXPSUMDIV_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 120007, + "real_time": 5.8092399238988792e-03, + "cpu_time": 5.8089997500145821e-03, + "time_unit": "ms" + }, + { + "name": "BM_SOFTMAXEXPSUMDIV_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_SOFTMAXEXPSUMDIV_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 176914, + "real_time": 3.9636845145346869e-03, + "cpu_time": 3.9634847383474463e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log new file mode 100644 index 00000000..e69de29b diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json new file mode 100644 index 00000000..caa25dfa --- /dev/null +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json @@ -0,0 +1,96 @@ +{ + "context": { + "date": "2025-09-07T12:46:24+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-matmul-transpose-b-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [4.61377,3.68164,5.14453], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.0958168849349022e+03, + "cpu_time": 1.0942407130000001e+03, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 2.9605090841650963e+02, + "cpu_time": 2.9603718579999986e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 3.6250606924295425e+01, + "cpu_time": 2.4062124000000118e+01, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 9.5354539155960083e+01, + "cpu_time": 9.5345416999999870e+01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log new file mode 100644 index 00000000..98b32f04 --- /dev/null +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log @@ -0,0 +1,21 @@ +2025-09-07T12:46:24+00:00 +Running ./dl-op-matmul-transpose-b-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 4.61, 3.68, 5.14 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +----------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +----------------------------------------------------------------------------------------------- +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1096 ms 1094 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 296 ms 296 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 36.3 ms 24.1 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 95.4 ms 95.3 ms 5 +---------- Verification ---------- +scalar_O3 PASS +scalar_O3_omp PASS +vec PASS diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json new file mode 100644 index 00000000..829e775c --- /dev/null +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T12:46:23+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-tosa-transpose-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.49072,3.25391,5.01562], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 4.1188374906778336e+01, + "cpu_time": 3.0001973999999997e+01, + "time_unit": "ms" + }, + { + "name": "DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 2.9296264052391052e+01, + "cpu_time": 2.4695980400000003e+01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log new file mode 100644 index 00000000..4b119245 --- /dev/null +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log @@ -0,0 +1,17 @@ +2025-09-07T12:46:23+00:00 +Running ./dl-op-tosa-transpose-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.49, 3.25, 5.02 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------------------------- +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 41.2 ms 30.0 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 29.3 ms 24.7 ms 5 +---------- Verification ---------- +scalar_O3 PASS diff --git a/test_result/deeplearning/run_results_summary.log b/test_result/deeplearning/run_results_summary.log new file mode 100644 index 00000000..309ac991 --- /dev/null +++ b/test_result/deeplearning/run_results_summary.log @@ -0,0 +1,49 @@ +[Missing] Executable not found for 'dl-model-tinyllama-benchmark' +[Missing] Executable not found for 'dl-model-mobilenetv3-benchmark' +[Failed] Run of 'dl-model-lenet-benchmark' + ↳ stdout/stderr → dl-model-lenet-benchmark.log (may contain errors) +[Missing] Executable not found for 'dl-model-bert-benchmark' +[Missing] Executable not found for 'dl-model-whisper-benchmark' +[Missing] Executable not found for 'dl-model-resnet18-benchmark' +[Failed] Run of 'dl-layer-ffn-benchmark' + ↳ stdout/stderr → dl-layer-ffn-benchmark.log (may contain errors) +[Failed] Run of 'dl-layer-selfattention-benchmark' + ↳ stdout/stderr → dl-layer-selfattention-benchmark.log (may contain errors) +[Failed] Run of 'dl-layer-rmsnorm-benchmark' + ↳ stdout/stderr → dl-layer-rmsnorm-benchmark.log (may contain errors) +[Missing] Executable not found for 'dl-op-linalg-matmul-benchmark' +[Failed] Run of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' + ↳ stdout/stderr → dl-op-linalg-conv2d-nchw-fchw-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' + ↳ stdout/stderr → dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' + ↳ stdout/stderr → dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' + ↳ stdout/stderr → dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-pooling-nhwc-sum-benchmark' + ↳ stdout/stderr → dl-op-linalg-pooling-nhwc-sum-benchmark.log (may contain errors) +[Missing] Executable not found for 'dl-op-linalg-batch-matmul-benchmark' +[Failed] Run of 'dl-op-linalg-arithaddf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithaddf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-arithdivf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithdivf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-arithmulf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithmulf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-arithnegf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithnegf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-arithsubf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithsubf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-mathfpow-benchmark' + ↳ stdout/stderr → dl-op-linalg-mathfpow-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-mathrsqrt-benchmark' + ↳ stdout/stderr → dl-op-linalg-mathrsqrt-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-mathexp-benchmark' + ↳ stdout/stderr → dl-op-linalg-mathexp-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-reduceaddf-benchmark' + ↳ stdout/stderr → dl-op-linalg-reduceaddf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-reducemaxf-benchmark' + ↳ stdout/stderr → dl-op-linalg-reducemaxf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-softmax-exp-sum-div-benchmark' + ↳ stdout/stderr → dl-op-linalg-softmax-exp-sum-div-benchmark.log (may contain errors) +[Missing] Executable not found for 'dl-op-tosa-transpose-benchmark' +[Missing] Executable not found for 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/geminiprocessing/build.log b/test_result/geminiprocessing/build.log new file mode 100644 index 00000000..aa1b4a29 --- /dev/null +++ b/test_result/geminiprocessing/build.log @@ -0,0 +1,655 @@ +[1/21] Creating directories for 'project_googlebenchmark' +[2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +riscv64-unknown-linux-gnu-gcc -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4': +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given + 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); + | ^ +In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23: +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:251: note: macro "gemmini_extended_config_ex" defined here + 251 | #define gemmini_extended_config_ex(dataflow, sys_act, sys_shift, relu6_shift, A_stride, A_transpose, B_transpose) \ + | +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: error: 'gemmini_extended_config_ex' undeclared (first use in this function) + 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: note: each undeclared identifier is reported only once for each function it appears in +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:35:18: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] + 35 | int32_t *res = (int32_t*) ((uint32_t)gemm_acc_malloc (16 * 16 * 4 * 4 * sizeof(int32_t))); + | ^ +In file included from /home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:20, + from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23: +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload' + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:119: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload' + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload' + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:125: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload' + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload' + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload' + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload' + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload' + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload' + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:126: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload' + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload' + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:132: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload' + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload' + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload' + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload' + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload' + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload' + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload' + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload' + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload' + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload' + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload' + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload' + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload' + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload' + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload' + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload' + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload' + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload' + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload' + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload' + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload' + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:89: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:9: note: in expansion of macro 'gemmini_extended_mvout' + 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:9: note: in expansion of macro 'gemmini_extended_mvout' + 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:9: note: in expansion of macro 'gemmini_extended_mvout' + 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:9: note: in expansion of macro 'gemmini_extended_mvout' + 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:105:17: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 105 | gemm_acc_free((uint32_t)(res)); + | ^ +[3/21] Building CXX object benchmarks/Gemmini/ResNet-101/CMakeFiles/CRunnerUtils.dir/CRunnerUtils.cpp.o +[4/21] Generating buddy_matmul.o +[5/21] Performing download step (git clone) for 'project_googlebenchmark' +Cloning into 'project_googlebenchmark'... +HEAD is now at f91b6b4 bump version to 1.6 in preparation for release +[6/21] Generating resnet-101.o +ninja: build stopped: subcommand failed. diff --git a/test_result/geminiprocessing/cmake_configure.log b/test_result/geminiprocessing/cmake_configure.log new file mode 100644 index 00000000..a3a42f37 --- /dev/null +++ b/test_result/geminiprocessing/cmake_configure.log @@ -0,0 +1,37 @@ +-- The CXX compiler identification is GNU 9.2.0 +-- The C compiler identification is GNU 9.2.0 +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Check for working CXX compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-g++ - skipped +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Detecting C compiler ABI info +-- Detecting C compiler ABI info - done +-- Check for working C compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-gcc - skipped +-- Detecting C compile features +-- Detecting C compile features - done +-- Configuring Target Architecture: avx512f +-- Configuring Target Triple: x86_64-unknown-linux-gnu +-- Configuring benchmarks: google +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed +-- Looking for pthread_create in pthreads +-- Looking for pthread_create in pthreads - not found +-- Looking for pthread_create in pthread +-- Looking for pthread_create in pthread - found +-- Found Threads: TRUE +-- Performing Test HAVE_SSE +-- Performing Test HAVE_SSE - Failed +-- SSE support - no +-- Performing Test HAVE_AVX2 +-- Performing Test HAVE_AVX2 - Failed +-- AVX2 support - no +-- Performing Test HAVE_AVX512 +-- Performing Test HAVE_AVX512 - Failed +-- AVX512 support - no +-- Performing Test HAVE_NEON +-- Performing Test HAVE_NEON - Failed +-- Arm Neon support - no +-- Configuring done +-- Generating done +-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..937dc3e3 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:29:02+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.04053,3.38672,4.34863], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 140, + "real_time": 4.9702270754746030e+00, + "cpu_time": 4.9699425000000002e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 92, + "real_time": 7.5326938101130985e+00, + "cpu_time": 7.5326059673913059e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1614, + "real_time": 4.3082923906517145e-01, + "cpu_time": 4.3081728562577470e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 624, + "real_time": 1.1141470943888028e+00, + "cpu_time": 1.1141273092948720e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 359, + "real_time": 1.9465408347609316e+00, + "cpu_time": 1.9465103370473527e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4753, + "real_time": 1.4783761252635608e-01, + "cpu_time": 1.4782938796549550e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2588, + "real_time": 2.7286772899778772e-01, + "cpu_time": 2.7285900193199364e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101129, + "real_time": 6.9313498568830253e-03, + "cpu_time": 6.9310267381265502e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47932, + "real_time": 1.4616750139684954e-02, + "cpu_time": 1.4616182466828002e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1876, + "real_time": 3.1442655476807024e-01, + "cpu_time": 3.1440550479744134e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2598, + "real_time": 3.2269134486611756e-01, + "cpu_time": 3.2267296497305642e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1000, + "real_time": 5.9975032135844231e-01, + "cpu_time": 5.9970746099999950e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1107, + "real_time": 5.4013467281922201e-01, + "cpu_time": 5.4010315356820238e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 670, + "real_time": 9.5614417275386065e-01, + "cpu_time": 9.5608898805969977e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 701, + "real_time": 9.5555469521272196e-01, + "cpu_time": 9.5552639372325310e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4869, + "real_time": 1.4373984491587616e-01, + "cpu_time": 1.4373398007804475e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3020, + "real_time": 2.3255034763094606e-01, + "cpu_time": 2.3254246953642435e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3017, + "real_time": 2.3421093841281837e-01, + "cpu_time": 2.3420040371229658e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2570, + "real_time": 2.7231803663973680e-01, + "cpu_time": 2.7231148404669192e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2568, + "real_time": 2.7264940416051592e-01, + "cpu_time": 2.7263315109034342e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2637, + "real_time": 2.6645213591586836e-01, + "cpu_time": 2.6643527948426282e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4955, + "real_time": 1.4140665997289625e-01, + "cpu_time": 1.4139799172553003e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..0aab2d9d --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:29:02+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.04, 3.39, 4.35 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.97 ms 4.97 ms 140 +MLIR_Conv2D/1 7.53 ms 7.53 ms 92 +Buddy_Conv2D/1 0.431 ms 0.431 ms 1614 +Buddy_Corr2D_Constant_Padding/1 1.11 ms 1.11 ms 624 +OpenCV_Filter2D_Constant_Padding/1 1.95 ms 1.95 ms 359 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4753 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2588 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101129 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47932 +Buddy_Erosion2D_Constant_Padding/1 0.314 ms 0.314 ms 1876 +Buddy_Dilation2D_Constant_Padding/1 0.323 ms 0.323 ms 2598 +Buddy_Opening2D_Constant_Padding/1 0.600 ms 0.600 ms 1000 +Buddy_Closing2D_Constant_Padding/1 0.540 ms 0.540 ms 1107 +Buddy_TopHat2D_Constant_Padding/1 0.956 ms 0.956 ms 670 +Buddy_BottomHat2D_Constant_Padding/1 0.956 ms 0.956 ms 701 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4869 +OpenCV_Opening2D_Constant_Padding/1 0.233 ms 0.233 ms 3020 +OpenCV_Closing2D_Constant_Padding/1 0.234 ms 0.234 ms 3017 +OpenCV_TopHat2D_Constant_Padding/1 0.272 ms 0.272 ms 2570 +OpenCV_BottomHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2568 +OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2637 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4955 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..825c9e79 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,349 @@ +{ + "context": { + "date": "2025-09-07T14:29:26+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.02539,3.35449,4.31201], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 109, + "real_time": 4.9761295523665368e+00, + "cpu_time": 4.9760039174311919e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 93, + "real_time": 7.5348360083436452e+00, + "cpu_time": 7.5346214623655925e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1613, + "real_time": 4.3241579896237492e-01, + "cpu_time": 4.3240159950402979e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 621, + "real_time": 1.1201563460719375e+00, + "cpu_time": 1.1201345571658614e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 358, + "real_time": 1.9431075591115312e+00, + "cpu_time": 1.9430616452513958e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4730, + "real_time": 1.4787029561608336e-01, + "cpu_time": 1.4785987547568707e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2582, + "real_time": 2.7276169563521718e-01, + "cpu_time": 2.7275317970565449e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100345, + "real_time": 6.9529070208036907e-03, + "cpu_time": 6.9522148388061148e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47937, + "real_time": 1.4593899464516223e-02, + "cpu_time": 1.4593418570206736e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2390, + "real_time": 2.9568342203625075e-01, + "cpu_time": 2.9566401338912135e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2256, + "real_time": 2.9021860859918258e-01, + "cpu_time": 2.9020803501773085e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 882, + "real_time": 6.7151636898923084e-01, + "cpu_time": 6.7144004875283347e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1000, + "real_time": 6.1067149415612221e-01, + "cpu_time": 6.1064459099999979e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 460, + "real_time": 1.3535791765088621e+00, + "cpu_time": 1.3534489391304345e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 753, + "real_time": 9.4793929877984096e-01, + "cpu_time": 9.4790966268260402e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4882, + "real_time": 1.4292015489961513e-01, + "cpu_time": 1.4291047501024184e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3137, + "real_time": 2.2372837479151775e-01, + "cpu_time": 2.2371705769843761e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3091, + "real_time": 2.2564606758546227e-01, + "cpu_time": 2.2562969718537662e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2642, + "real_time": 2.6471110273725063e-01, + "cpu_time": 2.6470357607872869e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2626, + "real_time": 2.7250474842331923e-01, + "cpu_time": 2.7248917098248282e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2667, + "real_time": 2.6142182346135878e-01, + "cpu_time": 2.6141412335957992e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4904, + "real_time": 1.4250244045870145e-01, + "cpu_time": 1.4249685929853156e-01, + "time_unit": "ms" + } + ] +} + diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..97464ce6 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:29:26+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.03, 3.35, 4.31 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.98 ms 4.98 ms 109 +MLIR_Conv2D/1 7.53 ms 7.53 ms 93 +Buddy_Conv2D/1 0.432 ms 0.432 ms 1613 +Buddy_Corr2D_Constant_Padding/1 1.12 ms 1.12 ms 621 +OpenCV_Filter2D_Constant_Padding/1 1.94 ms 1.94 ms 358 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4730 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2582 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100345 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47937 +Buddy_Erosion2D_Constant_Padding/1 0.296 ms 0.296 ms 2390 +Buddy_Dilation2D_Constant_Padding/1 0.290 ms 0.290 ms 2256 +Buddy_Opening2D_Constant_Padding/1 0.672 ms 0.671 ms 882 +Buddy_Closing2D_Constant_Padding/1 0.611 ms 0.611 ms 1000 +Buddy_TopHat2D_Constant_Padding/1 1.35 ms 1.35 ms 460 +Buddy_BottomHat2D_Constant_Padding/1 0.948 ms 0.948 ms 753 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4882 +OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3137 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3091 +OpenCV_TopHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2642 +OpenCV_BottomHat2D_Constant_Padding/1 0.273 ms 0.272 ms 2626 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2667 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.142 ms 4904 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..1dc67624 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:29:50+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.01562,3.32471,4.27539], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 52, + "real_time": 1.1901946451801519e+01, + "cpu_time": 1.1901702288461539e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 3.0438764263754305e+01, + "cpu_time": 3.0438311043478276e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 637, + "real_time": 1.1142698910490871e+00, + "cpu_time": 1.1142385604395602e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 375, + "real_time": 1.8523898224035900e+00, + "cpu_time": 1.8523744240000006e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 249, + "real_time": 2.7968978965617568e+00, + "cpu_time": 2.7968237389558248e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4716, + "real_time": 1.4841855141042753e-01, + "cpu_time": 1.4841650890585228e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2583, + "real_time": 2.7219368201838046e-01, + "cpu_time": 2.7218788850174197e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100958, + "real_time": 6.9158962019867246e-03, + "cpu_time": 6.9157541452881457e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47295, + "real_time": 1.4739391594914103e-02, + "cpu_time": 1.4739279247277717e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1902, + "real_time": 3.5209773396279909e-01, + "cpu_time": 3.5206602313354285e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2372, + "real_time": 2.8494088541446205e-01, + "cpu_time": 2.8493101180438485e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1419, + "real_time": 5.2841257374836070e-01, + "cpu_time": 5.2840475828047939e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1000, + "real_time": 5.3209472447633743e-01, + "cpu_time": 5.3207190200000021e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 726, + "real_time": 9.2559484841902395e-01, + "cpu_time": 9.2558814462809835e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 722, + "real_time": 9.3495421152861169e-01, + "cpu_time": 9.3492223961218790e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4207, + "real_time": 1.6627396342446635e-01, + "cpu_time": 1.6627164606608016e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2721, + "real_time": 2.5683478785980629e-01, + "cpu_time": 2.5682696435134150e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2682, + "real_time": 2.5946399131699521e-01, + "cpu_time": 2.5946211446681705e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2392, + "real_time": 2.9280804172407426e-01, + "cpu_time": 2.9279882984949784e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2392, + "real_time": 2.9102603851951486e-01, + "cpu_time": 2.9099642642140494e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2374, + "real_time": 2.9361070696305303e-01, + "cpu_time": 2.9360214321819761e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4301, + "real_time": 1.6287719699649639e-01, + "cpu_time": 1.6287602022785452e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..09a24a36 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:29:50+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.02, 3.32, 4.28 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.9 ms 11.9 ms 52 +MLIR_Conv2D/1 30.4 ms 30.4 ms 23 +Buddy_Conv2D/1 1.11 ms 1.11 ms 637 +Buddy_Corr2D_Constant_Padding/1 1.85 ms 1.85 ms 375 +OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 249 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4716 +Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2583 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100958 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47295 +Buddy_Erosion2D_Constant_Padding/1 0.352 ms 0.352 ms 1902 +Buddy_Dilation2D_Constant_Padding/1 0.285 ms 0.285 ms 2372 +Buddy_Opening2D_Constant_Padding/1 0.528 ms 0.528 ms 1419 +Buddy_Closing2D_Constant_Padding/1 0.532 ms 0.532 ms 1000 +Buddy_TopHat2D_Constant_Padding/1 0.926 ms 0.926 ms 726 +Buddy_BottomHat2D_Constant_Padding/1 0.935 ms 0.935 ms 722 +OpenCV_Erode2D_Constant_Padding/1 0.166 ms 0.166 ms 4207 +OpenCV_Opening2D_Constant_Padding/1 0.257 ms 0.257 ms 2721 +OpenCV_Closing2D_Constant_Padding/1 0.259 ms 0.259 ms 2682 +OpenCV_TopHat2D_Constant_Padding/1 0.293 ms 0.293 ms 2392 +OpenCV_BottomHat2D_Constant_Padding/1 0.291 ms 0.291 ms 2392 +OpenCV_MorphGrad2D_Constant_Padding/1 0.294 ms 0.294 ms 2374 +OpenCV_Dilate2D_Constant_Padding/1 0.163 ms 0.163 ms 4301 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..07ef45e8 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:30:14+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.08984,3.31445,4.24609], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 58, + "real_time": 1.1998113887063388e+01, + "cpu_time": 1.1997172689655171e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 3.0430977273246516e+01, + "cpu_time": 3.0430230913043474e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 694, + "real_time": 9.3945750284950735e-01, + "cpu_time": 9.3939238472622510e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 379, + "real_time": 1.8339350839246231e+00, + "cpu_time": 1.8338942585751978e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 249, + "real_time": 2.7970489727924148e+00, + "cpu_time": 2.7968971004016070e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4735, + "real_time": 1.4771520226029433e-01, + "cpu_time": 1.4771250242872222e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2581, + "real_time": 2.7436091312370020e-01, + "cpu_time": 2.7433653428903537e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101032, + "real_time": 6.9237584291694564e-03, + "cpu_time": 6.9233851156069301e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47897, + "real_time": 1.4611094101790878e-02, + "cpu_time": 1.4609948556276994e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2315, + "real_time": 3.1752810043083668e-01, + "cpu_time": 3.1751235248380194e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1991, + "real_time": 3.0030406326699172e-01, + "cpu_time": 3.0026843294826744e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1162, + "real_time": 5.2669528301119184e-01, + "cpu_time": 5.2667063166953465e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1147, + "real_time": 5.8120846059806675e-01, + "cpu_time": 5.8116856669572725e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 742, + "real_time": 9.3807371438674203e-01, + "cpu_time": 9.3802853369272166e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 731, + "real_time": 9.1354743094679103e-01, + "cpu_time": 9.1349827770177838e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4901, + "real_time": 1.4285111738647449e-01, + "cpu_time": 1.4284396123240140e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2990, + "real_time": 2.3436977786563312e-01, + "cpu_time": 2.3436707424749126e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2977, + "real_time": 2.3498714375624050e-01, + "cpu_time": 2.3497187302653738e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2568, + "real_time": 2.7303778743400381e-01, + "cpu_time": 2.7303231347352053e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2572, + "real_time": 2.6648068832433430e-01, + "cpu_time": 2.6646258125972005e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2624, + "real_time": 2.6757532884026081e-01, + "cpu_time": 2.6757154878048883e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4938, + "real_time": 1.4144324261126320e-01, + "cpu_time": 1.4143231227217509e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..e95c5e72 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,57 @@ +2025-09-07T14:30:14+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.09, 3.31, 4.25 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 12.0 ms 12.0 ms 58 +MLIR_Conv2D/1 30.4 ms 30.4 ms 23 +Buddy_Conv2D/1 0.939 ms 0.939 ms 694 +Buddy_Corr2D_Constant_Padding/1 1.83 ms 1.83 ms 379 +OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 249 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4735 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2581 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101032 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47897 +Buddy_Erosion2D_Constant_Padding/1 0.318 ms 0.318 ms 2315 +Buddy_Dilation2D_Constant_Padding/1 0.300 ms 0.300 ms 1991 +Buddy_Opening2D_Constant_Padding/1 0.527 ms 0.527 ms 1162 +Buddy_Closing2D_Constant_Padding/1 0.581 ms 0.581 ms 1147 +Buddy_TopHat2D_Constant_Padding/1 0.938 ms 0.938 ms 742 +Buddy_BottomHat2D_Constant_Padding/1 0.914 ms 0.913 ms 731 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4901 +OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 2990 +OpenCV_Closing2D_Constant_Padding/1 0.235 ms 0.235 ms 2977 +OpenCV_TopHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2568 +OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2572 +OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2624 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4938 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +Exception converting image to PNG format. +ERROR: Can't save PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..97022f6f --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:25:01+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [4.38672,3.83301,4.74805], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 136, + "real_time": 5.0944548702853565e+00, + "cpu_time": 5.0941901617647067e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 92, + "real_time": 7.5879288108452503e+00, + "cpu_time": 7.5877727717391323e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2126, + "real_time": 3.2648893102989485e-01, + "cpu_time": 3.2648295437441222e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 810, + "real_time": 8.6173117160797119e-01, + "cpu_time": 8.6171702345678991e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 532, + "real_time": 1.3098518987347310e+00, + "cpu_time": 1.3098337011278192e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4693, + "real_time": 1.4864658349845142e-01, + "cpu_time": 1.4864443596846369e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2574, + "real_time": 2.7232925843942418e-01, + "cpu_time": 2.7232468376068386e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101147, + "real_time": 6.9248195898291617e-03, + "cpu_time": 6.9247274758519840e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47876, + "real_time": 1.4619754012793923e-02, + "cpu_time": 1.4619547852786375e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1964, + "real_time": 3.4232623116426214e-01, + "cpu_time": 3.4231908757637480e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1987, + "real_time": 3.5313764707971351e-01, + "cpu_time": 3.5311560694514321e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1000, + "real_time": 7.3933277651667595e-01, + "cpu_time": 7.3929136599999978e-01, + "time_unit": "ms" + ,, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1338, + "real_time": 6.8773021879752116e-01, + "cpu_time": 6.8761599701046450e-01, + "time_unit": "ms" + ,, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 449, + "real_time": 1.3737666746290329e+00, + "cpu_time": 1.3737399821826268e+00, + "time_unit": "ms" + ,, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 451, + "real_time": 1.1508310349976145e+00, + "cpu_time": 1.1508074767184044e+00, + "time_unit": "ms" + ,, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4246, + "real_time": 1.6527650926088422e-01, + "cpu_time": 1.6527523033443248e-01, + "time_unit": "ms" + ,, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2717, + "real_time": 2.5677720045709945e-01, + "cpu_time": 2.5677142694147959e-01, + "time_unit": "ms" + ,, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2783, + "real_time": 2.5039162999470799e-01, + "cpu_time": 2.5038794178943619e-01, + "time_unit": "ms" + ,, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2439, + "real_time": 2.8731209325614449e-01, + "cpu_time": 2.8730276875768884e-01, + "time_unit": "ms" + ,, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2429, + "real_time": 2.8720653292670473e-01, + "cpu_time": 2.8719969699464831e-01, + "time_unit": "ms" + ,, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2460, + "real_time": 2.8502542555816773e-01, + "cpu_time": 2.8502001626016238e-01, + "time_unit": "ms" + ,, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4294, + "real_time": 1.6273279480452199e-01, + "cpu_time": 1.6272675011644155e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..d7b44d35 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:25:01+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 4.39, 3.83, 4.75 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.09 ms 5.09 ms 136 +MLIR_Conv2D/1 7.59 ms 7.59 ms 92 +Buddy_Conv2D/1 0.326 ms 0.326 ms 2126 +Buddy_Corr2D_Constant_Padding/1 0.862 ms 0.862 ms 810 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 532 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4693 +Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2574 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101147 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47876 +Buddy_Erosion2D_Constant_Padding/1 0.342 ms 0.342 ms 1964 +Buddy_Dilation2D_Constant_Padding/1 0.353 ms 0.353 ms 1987 +Buddy_Opening2D_Constant_Padding/1 0.739 ms 0.739 ms 1000 +Buddy_Closing2D_Constant_Padding/1 0.688 ms 0.688 ms 1338 +Buddy_TopHat2D_Constant_Padding/1 1.37 ms 1.37 ms 449 +Buddy_BottomHat2D_Constant_Padding/1 1.15 ms 1.15 ms 451 +OpenCV_Erode2D_Constant_Padding/1 0.165 ms 0.165 ms 4246 +OpenCV_Opening2D_Constant_Padding/1 0.257 ms 0.257 ms 2717 +OpenCV_Closing2D_Constant_Padding/1 0.250 ms 0.250 ms 2783 +OpenCV_TopHat2D_Constant_Padding/1 0.287 ms 0.287 ms 2439 +OpenCV_BottomHat2D_Constant_Padding/1 0.287 ms 0.287 ms 2429 +OpenCV_MorphGrad2D_Constant_Padding/1 0.285 ms 0.285 ms 2460 +OpenCV_Dilate2D_Constant_Padding/1 0.163 ms 0.163 ms 4294 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..16b026ca --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:25:25+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.9126,3.76514,4.7002], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 140, + "real_time": 4.9801771395972798e+00, + "cpu_time": 4.9799727642857139e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 92, + "real_time": 7.5637138570132461e+00, + "cpu_time": 7.5633655869565199e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1928, + "real_time": 3.6541006771298862e-01, + "cpu_time": 3.6539922977178435e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 814, + "real_time": 8.6126693453484149e-01, + "cpu_time": 8.6124013022113044e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 533, + "real_time": 1.3084959091992583e+00, + "cpu_time": 1.3084598105065661e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4720, + "real_time": 1.4866878815247850e-01, + "cpu_time": 1.4866191355932201e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2570, + "real_time": 2.7357344062643757e-01, + "cpu_time": 2.7355898560311276e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100923, + "real_time": 6.9336138939047895e-03, + "cpu_time": 6.9332814323791450e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47371, + "real_time": 1.4785069694910813e-02, + "cpu_time": 1.4784267484325859e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1930, + "real_time": 3.5314525771944016e-01, + "cpu_time": 3.5312864974093228e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2161, + "real_time": 3.0299739835439926e-01, + "cpu_time": 3.0298353354928237e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1325, + "real_time": 5.3083810322689562e-01, + "cpu_time": 5.3080599698113229e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1097, + "real_time": 5.6190424706572928e-01, + "cpu_time": 5.6186523518687359e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 737, + "real_time": 9.5735288135393992e-01, + "cpu_time": 9.5733702578019098e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 710, + "real_time": 9.2390114047997429e-01, + "cpu_time": 9.2382159295774735e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4222, + "real_time": 1.6582871410926897e-01, + "cpu_time": 1.6582006608242525e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2852, + "real_time": 2.4506063847103762e-01, + "cpu_time": 2.4504969950911681e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2864, + "real_time": 2.4439445982931712e-01, + "cpu_time": 2.4438653247206729e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2570, + "real_time": 2.7270443331638661e-01, + "cpu_time": 2.7268578988326853e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2597, + "real_time": 2.7151760983384476e-01, + "cpu_time": 2.7150931959953850e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2439, + "real_time": 2.8630925549401176e-01, + "cpu_time": 2.8630299056990677e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4308, + "real_time": 1.6245689889902082e-01, + "cpu_time": 1.6245064832869063e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..a4f8253f --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:25:25+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.91, 3.77, 4.70 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.98 ms 4.98 ms 140 +MLIR_Conv2D/1 7.56 ms 7.56 ms 92 +Buddy_Conv2D/1 0.365 ms 0.365 ms 1928 +Buddy_Corr2D_Constant_Padding/1 0.861 ms 0.861 ms 814 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 533 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4720 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2570 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100923 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47371 +Buddy_Erosion2D_Constant_Padding/1 0.353 ms 0.353 ms 1930 +Buddy_Dilation2D_Constant_Padding/1 0.303 ms 0.303 ms 2161 +Buddy_Opening2D_Constant_Padding/1 0.531 ms 0.531 ms 1325 +Buddy_Closing2D_Constant_Padding/1 0.562 ms 0.562 ms 1097 +Buddy_TopHat2D_Constant_Padding/1 0.957 ms 0.957 ms 737 +Buddy_BottomHat2D_Constant_Padding/1 0.924 ms 0.924 ms 710 +OpenCV_Erode2D_Constant_Padding/1 0.166 ms 0.166 ms 4222 +OpenCV_Opening2D_Constant_Padding/1 0.245 ms 0.245 ms 2852 +OpenCV_Closing2D_Constant_Padding/1 0.244 ms 0.244 ms 2864 +OpenCV_TopHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2570 +OpenCV_BottomHat2D_Constant_Padding/1 0.272 ms 0.272 ms 2597 +OpenCV_MorphGrad2D_Constant_Padding/1 0.286 ms 0.286 ms 2439 +OpenCV_Dilate2D_Constant_Padding/1 0.162 ms 0.162 ms 4308 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..583475bb --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:25:50+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.6001,3.70215,4.65381], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 137, + "real_time": 5.0967205723706819e+00, + "cpu_time": 5.0964819489051107e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 91, + "real_time": 7.6128631257093868e+00, + "cpu_time": 7.6126163516483496e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1997, + "real_time": 3.4362500795556356e-01, + "cpu_time": 3.4358477866800197e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 805, + "real_time": 8.6478873717118498e-01, + "cpu_time": 8.6472154285714264e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 529, + "real_time": 1.3110746324963740e+00, + "cpu_time": 1.3110105841209827e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4716, + "real_time": 1.5283490043762279e-01, + "cpu_time": 1.5283251993214594e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2554, + "real_time": 2.7269076267162523e-01, + "cpu_time": 2.7268682106499587e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101059, + "real_time": 6.9261688433581575e-03, + "cpu_time": 6.9260731255998999e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47776, + "real_time": 1.4638210093503979e-02, + "cpu_time": 1.4638031208137982e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1929, + "real_time": 2.9913433178092852e-01, + "cpu_time": 2.9912855054432330e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2459, + "real_time": 3.0520153711025383e-01, + "cpu_time": 3.0519616063440402e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1338, + "real_time": 5.4847415096734853e-01, + "cpu_time": 5.4846282810164348e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1000, + "real_time": 5.5285461992025375e-01, + "cpu_time": 5.5284405199999931e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 716, + "real_time": 9.7495791445064806e-01, + "cpu_time": 9.7494059916201126e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 692, + "real_time": 9.6457034909311745e-01, + "cpu_time": 9.6455439884393179e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4887, + "real_time": 1.4370574812374892e-01, + "cpu_time": 1.4370325148352775e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3079, + "real_time": 2.2928564821059302e-01, + "cpu_time": 2.2928312731406281e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3082, + "real_time": 2.2956116836926599e-01, + "cpu_time": 2.2955781213497742e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2636, + "real_time": 2.6566850951390852e-01, + "cpu_time": 2.6566524696509775e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2623, + "real_time": 2.6680013712380524e-01, + "cpu_time": 2.6679694357605821e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2663, + "real_time": 2.6179852199831988e-01, + "cpu_time": 2.6179651633496065e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4965, + "real_time": 1.4100358385333842e-01, + "cpu_time": 1.4100249889224539e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..96c6d225 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:25:50+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.60, 3.70, 4.65 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.10 ms 5.10 ms 137 +MLIR_Conv2D/1 7.61 ms 7.61 ms 91 +Buddy_Conv2D/1 0.344 ms 0.344 ms 1997 +Buddy_Corr2D_Constant_Padding/1 0.865 ms 0.865 ms 805 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 529 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.153 ms 0.153 ms 4716 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2554 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101059 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47776 +Buddy_Erosion2D_Constant_Padding/1 0.299 ms 0.299 ms 1929 +Buddy_Dilation2D_Constant_Padding/1 0.305 ms 0.305 ms 2459 +Buddy_Opening2D_Constant_Padding/1 0.548 ms 0.548 ms 1338 +Buddy_Closing2D_Constant_Padding/1 0.553 ms 0.553 ms 1000 +Buddy_TopHat2D_Constant_Padding/1 0.975 ms 0.975 ms 716 +Buddy_BottomHat2D_Constant_Padding/1 0.965 ms 0.965 ms 692 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4887 +OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3079 +OpenCV_Closing2D_Constant_Padding/1 0.230 ms 0.230 ms 3082 +OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2636 +OpenCV_BottomHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2623 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2663 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4965 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..b1ffde62 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:26:14+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.39404,3.64453,4.60889], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 121, + "real_time": 5.1075547071527847e+00, + "cpu_time": 5.1073665289256196e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 93, + "real_time": 7.6060896358823262e+00, + "cpu_time": 7.6059665161290306e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2123, + "real_time": 3.2846411506746714e-01, + "cpu_time": 3.2843848233631667e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 817, + "real_time": 8.6353970019003168e-01, + "cpu_time": 8.6350830844553239e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 533, + "real_time": 1.3113399062438484e+00, + "cpu_time": 1.3112924840525322e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4732, + "real_time": 1.4925754329131402e-01, + "cpu_time": 1.4925238377007605e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2557, + "real_time": 2.7432158665121836e-01, + "cpu_time": 2.7429795150567055e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101042, + "real_time": 6.9291528471928992e-03, + "cpu_time": 6.9288272599513055e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47868, + "real_time": 1.4627723422845006e-02, + "cpu_time": 1.4627092149243754e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2430, + "real_time": 2.8533444973666972e-01, + "cpu_time": 2.8531405061728438e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2094, + "real_time": 3.1473711951357358e-01, + "cpu_time": 3.1470967144221584e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1014, + "real_time": 5.7866645120892535e-01, + "cpu_time": 5.7864507199211168e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1111, + "real_time": 5.6143095641389396e-01, + "cpu_time": 5.6142596129612876e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 716, + "real_time": 9.7362239787864946e-01, + "cpu_time": 9.7356560335195652e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 685, + "real_time": 9.8786628159293288e-01, + "cpu_time": 9.8785897518248145e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4300, + "real_time": 1.4379027624462926e-01, + "cpu_time": 1.4378212465116308e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3063, + "real_time": 2.2853155405428766e-01, + "cpu_time": 2.2852091609533151e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3071, + "real_time": 2.2941528513508408e-01, + "cpu_time": 2.2940828427222390e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2523, + "real_time": 2.6365732489906035e-01, + "cpu_time": 2.6365356282203672e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2656, + "real_time": 2.6352780433483869e-01, + "cpu_time": 2.6352464533132480e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2657, + "real_time": 2.6354999174558447e-01, + "cpu_time": 2.6354654459917209e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4948, + "real_time": 1.4165986747922873e-01, + "cpu_time": 1.4165828880355658e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..6a502cdc --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:26:14+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.39, 3.64, 4.61 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.11 ms 5.11 ms 121 +MLIR_Conv2D/1 7.61 ms 7.61 ms 93 +Buddy_Conv2D/1 0.328 ms 0.328 ms 2123 +Buddy_Corr2D_Constant_Padding/1 0.864 ms 0.864 ms 817 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 533 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4732 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2557 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101042 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47868 +Buddy_Erosion2D_Constant_Padding/1 0.285 ms 0.285 ms 2430 +Buddy_Dilation2D_Constant_Padding/1 0.315 ms 0.315 ms 2094 +Buddy_Opening2D_Constant_Padding/1 0.579 ms 0.579 ms 1014 +Buddy_Closing2D_Constant_Padding/1 0.561 ms 0.561 ms 1111 +Buddy_TopHat2D_Constant_Padding/1 0.974 ms 0.974 ms 716 +Buddy_BottomHat2D_Constant_Padding/1 0.988 ms 0.988 ms 685 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4300 +OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3063 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3071 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2523 +OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2656 +OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2657 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4948 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..7cbd4e18 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:26:37+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.28125,3.60205,4.57373], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 56, + "real_time": 1.2067083735018969e+01, + "cpu_time": 1.2066812107142857e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 3.0612203414025515e+01, + "cpu_time": 3.0611188304347817e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 568, + "real_time": 1.3034208284192521e+00, + "cpu_time": 1.3033825299295774e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 282, + "real_time": 2.4699879717742297e+00, + "cpu_time": 2.4699094858156023e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 162, + "real_time": 4.3287115562477227e+00, + "cpu_time": 4.3285012407407431e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4700, + "real_time": 1.4935964916614777e-01, + "cpu_time": 1.4935194957446807e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2552, + "real_time": 2.7521437495292916e-01, + "cpu_time": 2.7520046473354215e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100337, + "real_time": 6.9741815465525284e-03, + "cpu_time": 6.9738547993262777e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 46773, + "real_time": 1.4982721659303913e-02, + "cpu_time": 1.4982278921600070e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2050, + "real_time": 3.1215877976359391e-01, + "cpu_time": 3.1211740390243931e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2432, + "real_time": 2.9099147190879049e-01, + "cpu_time": 2.9097846422697360e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1079, + "real_time": 5.8892201131643029e-01, + "cpu_time": 5.8888681742354165e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1200, + "real_time": 5.7249554432928562e-01, + "cpu_time": 5.7247323416666751e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 693, + "real_time": 9.6331589450739852e-01, + "cpu_time": 9.6324691630591597e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 720, + "real_time": 9.4459417483044994e-01, + "cpu_time": 9.4454029166666720e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4811, + "real_time": 1.4385227303672446e-01, + "cpu_time": 1.4383307960922870e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3023, + "real_time": 2.3205718441471690e-01, + "cpu_time": 2.3204768309626247e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3090, + "real_time": 2.2930755222692459e-01, + "cpu_time": 2.2929591650485445e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2569, + "real_time": 2.6998296670061739e-01, + "cpu_time": 2.6997637757882353e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2579, + "real_time": 2.6765619283639541e-01, + "cpu_time": 2.6764102830554409e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2657, + "real_time": 2.6501084954432630e-01, + "cpu_time": 2.6500044373353410e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4921, + "real_time": 1.4214790953269119e-01, + "cpu_time": 1.4214541942694606e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..381be4b4 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:26:37+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.28, 3.60, 4.57 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 12.1 ms 12.1 ms 56 +MLIR_Conv2D/1 30.6 ms 30.6 ms 23 +Buddy_Conv2D/1 1.30 ms 1.30 ms 568 +Buddy_Corr2D_Constant_Padding/1 2.47 ms 2.47 ms 282 +OpenCV_Filter2D_Constant_Padding/1 4.33 ms 4.33 ms 162 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4700 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2552 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100337 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 46773 +Buddy_Erosion2D_Constant_Padding/1 0.312 ms 0.312 ms 2050 +Buddy_Dilation2D_Constant_Padding/1 0.291 ms 0.291 ms 2432 +Buddy_Opening2D_Constant_Padding/1 0.589 ms 0.589 ms 1079 +Buddy_Closing2D_Constant_Padding/1 0.572 ms 0.572 ms 1200 +Buddy_TopHat2D_Constant_Padding/1 0.963 ms 0.963 ms 693 +Buddy_BottomHat2D_Constant_Padding/1 0.945 ms 0.945 ms 720 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4811 +OpenCV_Opening2D_Constant_Padding/1 0.232 ms 0.232 ms 3023 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3090 +OpenCV_TopHat2D_Constant_Padding/1 0.270 ms 0.270 ms 2569 +OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2579 +OpenCV_MorphGrad2D_Constant_Padding/1 0.265 ms 0.265 ms 2657 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4921 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..cd4dab75 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:27:01+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.31982,3.58496,4.54102], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 59, + "real_time": 1.1948739699387955e+01, + "cpu_time": 1.1948392644067797e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 3.0476028666548107e+01, + "cpu_time": 3.0474844999999998e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 562, + "real_time": 1.2884445793369912e+00, + "cpu_time": 1.2884208736654807e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 284, + "real_time": 2.4603739590711995e+00, + "cpu_time": 2.4602806619718312e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 163, + "real_time": 4.2872666581276739e+00, + "cpu_time": 4.2871032392638035e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4715, + "real_time": 1.4838221327876741e-01, + "cpu_time": 1.4837547020148453e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2576, + "real_time": 2.7262466462178631e-01, + "cpu_time": 2.7260505046583849e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101005, + "real_time": 6.9283273903298307e-03, + "cpu_time": 6.9277283500816817e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47760, + "real_time": 1.4637780439738853e-02, + "cpu_time": 1.4637589635678379e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2247, + "real_time": 2.7769743156104176e-01, + "cpu_time": 2.7768090075656437e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2603, + "real_time": 3.3085079827173097e-01, + "cpu_time": 3.3083193046484832e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1077, + "real_time": 5.8244004112321579e-01, + "cpu_time": 5.8239145868152165e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1171, + "real_time": 5.6926543123695605e-01, + "cpu_time": 5.6925410674637089e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 737, + "real_time": 9.5344758927417739e-01, + "cpu_time": 9.5340984531885853e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 722, + "real_time": 9.4657656001417256e-01, + "cpu_time": 9.4650936842105216e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4886, + "real_time": 1.4358902494169579e-01, + "cpu_time": 1.4358145968072070e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2978, + "real_time": 2.3539010072571068e-01, + "cpu_time": 2.3538351511081329e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2979, + "real_time": 2.3258523218341223e-01, + "cpu_time": 2.3257072171869730e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2603, + "real_time": 2.6823798372706131e-01, + "cpu_time": 2.6822945024971095e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2574, + "real_time": 2.7231369876731898e-01, + "cpu_time": 2.7229450038850062e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2619, + "real_time": 2.6665248173072437e-01, + "cpu_time": 2.6664444520809394e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4960, + "real_time": 1.4122858448254486e-01, + "cpu_time": 1.4122089999999995e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..6504a841 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:27:01+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.32, 3.58, 4.54 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.9 ms 11.9 ms 59 +MLIR_Conv2D/1 30.5 ms 30.5 ms 23 +Buddy_Conv2D/1 1.29 ms 1.29 ms 562 +Buddy_Corr2D_Constant_Padding/1 2.46 ms 2.46 ms 284 +OpenCV_Filter2D_Constant_Padding/1 4.29 ms 4.29 ms 163 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4715 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2576 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101005 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47760 +Buddy_Erosion2D_Constant_Padding/1 0.278 ms 0.278 ms 2247 +Buddy_Dilation2D_Constant_Padding/1 0.331 ms 0.331 ms 2603 +Buddy_Opening2D_Constant_Padding/1 0.582 ms 0.582 ms 1077 +Buddy_Closing2D_Constant_Padding/1 0.569 ms 0.569 ms 1171 +Buddy_TopHat2D_Constant_Padding/1 0.953 ms 0.953 ms 737 +Buddy_BottomHat2D_Constant_Padding/1 0.947 ms 0.947 ms 722 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4886 +OpenCV_Opening2D_Constant_Padding/1 0.235 ms 0.235 ms 2978 +OpenCV_Closing2D_Constant_Padding/1 0.233 ms 0.233 ms 2979 +OpenCV_TopHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2603 +OpenCV_BottomHat2D_Constant_Padding/1 0.272 ms 0.272 ms 2574 +OpenCV_MorphGrad2D_Constant_Padding/1 0.267 ms 0.267 ms 2619 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4960 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..9cd4797c --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:27:26+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.20947,3.53711,4.49951], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 31, + "real_time": 2.2631176176571078e+01, + "cpu_time": 2.2627850516129033e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 7.0389824360609055e+01, + "cpu_time": 7.0386717899999994e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 291, + "real_time": 2.3570958640157560e+00, + "cpu_time": 2.3570359209621992e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 145, + "real_time": 4.8382232415265047e+00, + "cpu_time": 4.8375873103448299e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 78, + "real_time": 8.9898192538664894e+00, + "cpu_time": 8.9885258333333269e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4732, + "real_time": 1.4900763708836731e-01, + "cpu_time": 1.4898793300929827e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2567, + "real_time": 2.7684785533053519e-01, + "cpu_time": 2.7677605103233366e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100753, + "real_time": 7.1818906740909278e-03, + "cpu_time": 7.1817220132403070e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47612, + "real_time": 1.4661686260374937e-02, + "cpu_time": 1.4661306267327555e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2204, + "real_time": 3.2491934825471869e-01, + "cpu_time": 3.2490066288566188e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2109, + "real_time": 3.3516474085828601e-01, + "cpu_time": 3.3511585395922289e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1000, + "real_time": 6.0891722515225410e-01, + "cpu_time": 6.0877587399999911e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1056, + "real_time": 5.6265958471957478e-01, + "cpu_time": 5.6263823579545469e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 665, + "real_time": 1.0144153091692387e+00, + "cpu_time": 1.0142662390977437e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 687, + "real_time": 9.8415326972368677e-01, + "cpu_time": 9.8413417176128126e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4827, + "real_time": 1.4460108045936990e-01, + "cpu_time": 1.4459874000414299e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3090, + "real_time": 2.2893413467314636e-01, + "cpu_time": 2.2890716181229737e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3038, + "real_time": 2.3062362374248593e-01, + "cpu_time": 2.3059413199473325e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2627, + "real_time": 2.6741435983174650e-01, + "cpu_time": 2.6737748953178492e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2614, + "real_time": 2.6675470190460482e-01, + "cpu_time": 2.6671898355011386e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2680, + "real_time": 2.6224578188648867e-01, + "cpu_time": 2.6221102089552206e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4958, + "real_time": 1.4103486151188407e-01, + "cpu_time": 1.4101694735780548e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..e2188404 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:27:26+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.21, 3.54, 4.50 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 22.6 ms 22.6 ms 31 +MLIR_Conv2D/1 70.4 ms 70.4 ms 10 +Buddy_Conv2D/1 2.36 ms 2.36 ms 291 +Buddy_Corr2D_Constant_Padding/1 4.84 ms 4.84 ms 145 +OpenCV_Filter2D_Constant_Padding/1 8.99 ms 8.99 ms 78 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4732 +Buddy_Resize2D_Bilinear_Interpolation/1 0.277 ms 0.277 ms 2567 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100753 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47612 +Buddy_Erosion2D_Constant_Padding/1 0.325 ms 0.325 ms 2204 +Buddy_Dilation2D_Constant_Padding/1 0.335 ms 0.335 ms 2109 +Buddy_Opening2D_Constant_Padding/1 0.609 ms 0.609 ms 1000 +Buddy_Closing2D_Constant_Padding/1 0.563 ms 0.563 ms 1056 +Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 665 +Buddy_BottomHat2D_Constant_Padding/1 0.984 ms 0.984 ms 687 +OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4827 +OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3090 +OpenCV_Closing2D_Constant_Padding/1 0.231 ms 0.231 ms 3038 +OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2627 +OpenCV_BottomHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2614 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2680 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4958 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..1dbbecce --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:27:51+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.13721,3.49268,4.45801], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 31, + "real_time": 2.2485823640900275e+01, + "cpu_time": 2.2485474354838711e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 7.0325020700693130e+01, + "cpu_time": 7.0323628100000008e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 304, + "real_time": 2.3227319930140911e+00, + "cpu_time": 2.3226553092105280e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 144, + "real_time": 4.8592611629929809e+00, + "cpu_time": 4.8591047083333336e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 78, + "real_time": 8.9812785004958133e+00, + "cpu_time": 8.9809005512820512e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4740, + "real_time": 1.4794336765501570e-01, + "cpu_time": 1.4793731962025314e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2579, + "real_time": 2.7309454177275516e-01, + "cpu_time": 2.7308133927879030e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101192, + "real_time": 6.9005510118396131e-03, + "cpu_time": 6.9002890544707105e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47570, + "real_time": 1.4686870232258922e-02, + "cpu_time": 1.4686363212108497e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2503, + "real_time": 3.0775897315012374e-01, + "cpu_time": 3.0774776468238130e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2313, + "real_time": 2.9183508988535872e-01, + "cpu_time": 2.9181160397751793e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1118, + "real_time": 5.0250999962707754e-01, + "cpu_time": 5.0250005456171720e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1097, + "real_time": 4.7140384098020810e-01, + "cpu_time": 4.7139598450319076e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 739, + "real_time": 9.4388246395107211e-01, + "cpu_time": 9.4387578755074519e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 700, + "real_time": 9.3932237476110458e-01, + "cpu_time": 9.3930383428571462e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4835, + "real_time": 1.4465527980749215e-01, + "cpu_time": 1.4465337456049635e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3096, + "real_time": 2.3360328763261323e-01, + "cpu_time": 2.3359945413436684e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3103, + "real_time": 2.2638490458976213e-01, + "cpu_time": 2.2637974830808869e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2686, + "real_time": 2.6290406343303147e-01, + "cpu_time": 2.6289967795979169e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2642, + "real_time": 2.6402394037008464e-01, + "cpu_time": 2.6402097728993146e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2681, + "real_time": 2.6178075412634993e-01, + "cpu_time": 2.6177909585975329e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4948, + "real_time": 1.4226491018684043e-01, + "cpu_time": 1.4226395371867465e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..50b485e5 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:27:51+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.14, 3.49, 4.46 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 22.5 ms 22.5 ms 31 +MLIR_Conv2D/1 70.3 ms 70.3 ms 10 +Buddy_Conv2D/1 2.32 ms 2.32 ms 304 +Buddy_Corr2D_Constant_Padding/1 4.86 ms 4.86 ms 144 +OpenCV_Filter2D_Constant_Padding/1 8.98 ms 8.98 ms 78 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4740 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2579 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101192 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47570 +Buddy_Erosion2D_Constant_Padding/1 0.308 ms 0.308 ms 2503 +Buddy_Dilation2D_Constant_Padding/1 0.292 ms 0.292 ms 2313 +Buddy_Opening2D_Constant_Padding/1 0.503 ms 0.503 ms 1118 +Buddy_Closing2D_Constant_Padding/1 0.471 ms 0.471 ms 1097 +Buddy_TopHat2D_Constant_Padding/1 0.944 ms 0.944 ms 739 +Buddy_BottomHat2D_Constant_Padding/1 0.939 ms 0.939 ms 700 +OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4835 +OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 3096 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3103 +OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2686 +OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2642 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2681 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4948 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..7142618a --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:28:14+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.08936,3.45215,4.41846], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 19, + "real_time": 3.6411155799501820e+01, + "cpu_time": 3.6410112789473686e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 6, + "real_time": 1.2272199243307114e+02, + "cpu_time": 1.2271996516666665e+02, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 164, + "real_time": 4.2625103463850369e+00, + "cpu_time": 4.2624786829268295e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 87, + "real_time": 8.0896045627265138e+00, + "cpu_time": 8.0895408160919526e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 116, + "real_time": 6.0323840328331650e+00, + "cpu_time": 6.0323151551724177e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4740, + "real_time": 1.4794642962358168e-01, + "cpu_time": 1.4794533839662441e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2578, + "real_time": 2.7431065723866432e-01, + "cpu_time": 2.7430675096974405e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101183, + "real_time": 6.9150012404555119e-03, + "cpu_time": 6.9149498927685484e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47997, + "real_time": 1.4597131495282340e-02, + "cpu_time": 1.4596917932370775e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2553, + "real_time": 2.8082992789608052e-01, + "cpu_time": 2.8082769095182175e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2317, + "real_time": 2.6909916641180198e-01, + "cpu_time": 2.6909711523521840e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1198, + "real_time": 4.8579790867330236e-01, + "cpu_time": 4.8579480467445768e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1256, + "real_time": 4.3843196005008783e-01, + "cpu_time": 4.3842444347133841e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 721, + "real_time": 9.3752561180998317e-01, + "cpu_time": 9.3750926213592223e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 740, + "real_time": 9.3545040770156962e-01, + "cpu_time": 9.3543432297297435e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4947, + "real_time": 1.4094690816171584e-01, + "cpu_time": 1.4094593612290271e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3152, + "real_time": 2.2193788063858971e-01, + "cpu_time": 2.2193630583756352e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3129, + "real_time": 2.2150588496571799e-01, + "cpu_time": 2.2150361585171055e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2742, + "real_time": 2.5509349882254889e-01, + "cpu_time": 2.5509168016046663e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2746, + "real_time": 2.5341272739407272e-01, + "cpu_time": 2.5341093226511219e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2689, + "real_time": 2.6112427472979571e-01, + "cpu_time": 2.6112239866121267e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4988, + "real_time": 1.4059869355933610e-01, + "cpu_time": 1.4059724599037698e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..268cf92d --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:28:14+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.09, 3.45, 4.42 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 36.4 ms 36.4 ms 19 +MLIR_Conv2D/1 123 ms 123 ms 6 +Buddy_Conv2D/1 4.26 ms 4.26 ms 164 +Buddy_Corr2D_Constant_Padding/1 8.09 ms 8.09 ms 87 +OpenCV_Filter2D_Constant_Padding/1 6.03 ms 6.03 ms 116 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4740 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2578 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101183 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47997 +Buddy_Erosion2D_Constant_Padding/1 0.281 ms 0.281 ms 2553 +Buddy_Dilation2D_Constant_Padding/1 0.269 ms 0.269 ms 2317 +Buddy_Opening2D_Constant_Padding/1 0.486 ms 0.486 ms 1198 +Buddy_Closing2D_Constant_Padding/1 0.438 ms 0.438 ms 1256 +Buddy_TopHat2D_Constant_Padding/1 0.938 ms 0.938 ms 721 +Buddy_BottomHat2D_Constant_Padding/1 0.935 ms 0.935 ms 740 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4947 +OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3152 +OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3129 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2742 +OpenCV_BottomHat2D_Constant_Padding/1 0.253 ms 0.253 ms 2746 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2689 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4988 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..d7045e77 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:28:38+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.06299,3.42188,4.38721], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 19, + "real_time": 3.6139344306368578e+01, + "cpu_time": 3.6132733684210528e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 6, + "real_time": 1.2281656203170617e+02, + "cpu_time": 1.2280101033333335e+02, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 174, + "real_time": 4.0128050136497651e+00, + "cpu_time": 4.0122547816091965e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 86, + "real_time": 8.0886327683232544e+00, + "cpu_time": 8.0876173023255777e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 116, + "real_time": 6.0190088276205396e+00, + "cpu_time": 6.0181940862068943e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4751, + "real_time": 1.4789280477912017e-01, + "cpu_time": 1.4787357756261846e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2581, + "real_time": 2.7365781617968271e-01, + "cpu_time": 2.7361916388996527e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101075, + "real_time": 6.9230752152851715e-03, + "cpu_time": 6.9229836655948533e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47881, + "real_time": 1.4611373583860823e-02, + "cpu_time": 1.4611187402101052e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2510, + "real_time": 3.0442776225240109e-01, + "cpu_time": 3.0439000358565693e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2091, + "real_time": 2.6490774481334167e-01, + "cpu_time": 2.6486507604017184e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1262, + "real_time": 4.7815910196625488e-01, + "cpu_time": 4.7810812282091797e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1152, + "real_time": 4.8954437241061693e-01, + "cpu_time": 4.8945949305555547e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 713, + "real_time": 9.2543011627892791e-01, + "cpu_time": 9.2528858064516162e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 733, + "real_time": 9.2815726710862057e-01, + "cpu_time": 9.2803548840382022e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4956, + "real_time": 1.4103952227002484e-01, + "cpu_time": 1.4102089689265529e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3097, + "real_time": 2.2641591508811160e-01, + "cpu_time": 2.2638668517920651e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3063, + "real_time": 2.2829655792601858e-01, + "cpu_time": 2.2829427815866776e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2628, + "real_time": 2.6419852495964624e-01, + "cpu_time": 2.6419667199391139e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2623, + "real_time": 2.6604910979414548e-01, + "cpu_time": 2.6604722645825318e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2690, + "real_time": 2.6090611395561120e-01, + "cpu_time": 2.6090426319702570e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4952, + "real_time": 1.4145706474829181e-01, + "cpu_time": 1.4145561409531551e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..34c65b95 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:28:38+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.06, 3.42, 4.39 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 36.1 ms 36.1 ms 19 +MLIR_Conv2D/1 123 ms 123 ms 6 +Buddy_Conv2D/1 4.01 ms 4.01 ms 174 +Buddy_Corr2D_Constant_Padding/1 8.09 ms 8.09 ms 86 +OpenCV_Filter2D_Constant_Padding/1 6.02 ms 6.02 ms 116 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4751 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2581 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101075 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47881 +Buddy_Erosion2D_Constant_Padding/1 0.304 ms 0.304 ms 2510 +Buddy_Dilation2D_Constant_Padding/1 0.265 ms 0.265 ms 2091 +Buddy_Opening2D_Constant_Padding/1 0.478 ms 0.478 ms 1262 +Buddy_Closing2D_Constant_Padding/1 0.490 ms 0.489 ms 1152 +Buddy_TopHat2D_Constant_Padding/1 0.925 ms 0.925 ms 713 +Buddy_BottomHat2D_Constant_Padding/1 0.928 ms 0.928 ms 733 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4956 +OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3097 +OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 3063 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2628 +OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2623 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2690 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4952 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..538ed5ff --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:23:19+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.03271,3.75732,4.84424], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 136, + "real_time": 5.1130582030643437e+00, + "cpu_time": 5.1129978750000005e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 92, + "real_time": 7.5614458840826284e+00, + "cpu_time": 7.5612571630434795e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 991, + "real_time": 7.3911826286979931e-01, + "cpu_time": 7.3911208072653889e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 634, + "real_time": 1.0977113554812381e+00, + "cpu_time": 1.0976892066246056e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 359, + "real_time": 1.9464333305617894e+00, + "cpu_time": 1.9463996295264627e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4731, + "real_time": 1.4831822525368915e-01, + "cpu_time": 1.4831527478334391e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2579, + "real_time": 2.7571449409440857e-01, + "cpu_time": 2.7571217332299330e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101112, + "real_time": 6.9151851410107082e-03, + "cpu_time": 6.9149452488329738e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47882, + "real_time": 1.4606120489572882e-02, + "cpu_time": 1.4605831794828930e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2267, + "real_time": 2.9327621150237643e-01, + "cpu_time": 2.9326395059550070e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2325, + "real_time": 2.8085091902363685e-01, + "cpu_time": 2.8084884645161295e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1277, + "real_time": 4.9499843850113401e-01, + "cpu_time": 4.9497863508222334e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1822, + "real_time": 3.4349706150697695e-01, + "cpu_time": 3.4349251042810131e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 705, + "real_time": 9.3753048291443086e-01, + "cpu_time": 9.3750746524822892e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 717, + "real_time": 9.3463689836996866e-01, + "cpu_time": 9.3461899302649698e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4954, + "real_time": 1.4094130591394249e-01, + "cpu_time": 1.4093779975777146e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3148, + "real_time": 2.2180359976264391e-01, + "cpu_time": 2.2180045520965658e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3189, + "real_time": 2.1949756099685078e-01, + "cpu_time": 2.1949316149263115e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2736, + "real_time": 2.5490349926577327e-01, + "cpu_time": 2.5490010709064376e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2737, + "real_time": 2.5427932852717877e-01, + "cpu_time": 2.5427474059188909e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2679, + "real_time": 2.6234736879533155e-01, + "cpu_time": 2.6234529749906660e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4852, + "real_time": 1.4455454318069369e-01, + "cpu_time": 1.4455208182192936e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..bbdcfd5a --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:23:19+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.03, 3.76, 4.84 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.11 ms 5.11 ms 136 +MLIR_Conv2D/1 7.56 ms 7.56 ms 92 +Buddy_Conv2D/1 0.739 ms 0.739 ms 991 +Buddy_Corr2D_Constant_Padding/1 1.10 ms 1.10 ms 634 +OpenCV_Filter2D_Constant_Padding/1 1.95 ms 1.95 ms 359 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4731 +Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2579 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101112 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47882 +Buddy_Erosion2D_Constant_Padding/1 0.293 ms 0.293 ms 2267 +Buddy_Dilation2D_Constant_Padding/1 0.281 ms 0.281 ms 2325 +Buddy_Opening2D_Constant_Padding/1 0.495 ms 0.495 ms 1277 +Buddy_Closing2D_Constant_Padding/1 0.343 ms 0.343 ms 1822 +Buddy_TopHat2D_Constant_Padding/1 0.938 ms 0.938 ms 705 +Buddy_BottomHat2D_Constant_Padding/1 0.935 ms 0.935 ms 717 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4954 +OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3148 +OpenCV_Closing2D_Constant_Padding/1 0.219 ms 0.219 ms 3189 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2736 +OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2737 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2679 +OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4852 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..4622ce88 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:23:42+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.10303,3.72461,4.80908], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 140, + "real_time": 4.9881702821169585e+00, + "cpu_time": 4.9880098642857140e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 92, + "real_time": 7.5616192315583648e+00, + "cpu_time": 7.5614974891304341e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 938, + "real_time": 7.0375580984010877e-01, + "cpu_time": 7.0372593390191918e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 634, + "real_time": 1.0931943234773089e+00, + "cpu_time": 1.0931488375394325e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 360, + "real_time": 1.9437930236260097e+00, + "cpu_time": 1.9436920166666676e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4731, + "real_time": 1.4828980958025845e-01, + "cpu_time": 1.4828513654618480e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2575, + "real_time": 2.7519861905320175e-01, + "cpu_time": 2.7518627728155343e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100987, + "real_time": 6.9202101019204187e-03, + "cpu_time": 6.9199823739689269e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47878, + "real_time": 1.4626334239538314e-02, + "cpu_time": 1.4625822966707055e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2361, + "real_time": 2.8520150363369545e-01, + "cpu_time": 2.8518143879711966e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2362, + "real_time": 3.0680066550262491e-01, + "cpu_time": 3.0677437510584199e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1000, + "real_time": 6.1990195512771606e-01, + "cpu_time": 6.1985857099999997e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1712, + "real_time": 3.8910013945581756e-01, + "cpu_time": 3.8908160338785042e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 698, + "real_time": 9.5385507304210715e-01, + "cpu_time": 9.5382185100286465e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 712, + "real_time": 9.5589765546362049e-01, + "cpu_time": 9.5589013202247197e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4941, + "real_time": 1.4135648417849214e-01, + "cpu_time": 1.4134417870876342e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3094, + "real_time": 2.2559705339059108e-01, + "cpu_time": 2.2558599385908198e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3097, + "real_time": 2.2691802094934985e-01, + "cpu_time": 2.2690593283823002e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2642, + "real_time": 2.6526975668938568e-01, + "cpu_time": 2.6526233232399676e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2635, + "real_time": 2.6353473653150916e-01, + "cpu_time": 2.6351631650853868e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2666, + "real_time": 2.6146747557542777e-01, + "cpu_time": 2.6145232070517682e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4900, + "real_time": 1.4341449479059298e-01, + "cpu_time": 1.4340792591836732e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..d5b3ef25 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:23:42+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.10, 3.72, 4.81 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.99 ms 4.99 ms 140 +MLIR_Conv2D/1 7.56 ms 7.56 ms 92 +Buddy_Conv2D/1 0.704 ms 0.704 ms 938 +Buddy_Corr2D_Constant_Padding/1 1.09 ms 1.09 ms 634 +OpenCV_Filter2D_Constant_Padding/1 1.94 ms 1.94 ms 360 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4731 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2575 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100987 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47878 +Buddy_Erosion2D_Constant_Padding/1 0.285 ms 0.285 ms 2361 +Buddy_Dilation2D_Constant_Padding/1 0.307 ms 0.307 ms 2362 +Buddy_Opening2D_Constant_Padding/1 0.620 ms 0.620 ms 1000 +Buddy_Closing2D_Constant_Padding/1 0.389 ms 0.389 ms 1712 +Buddy_TopHat2D_Constant_Padding/1 0.954 ms 0.954 ms 698 +Buddy_BottomHat2D_Constant_Padding/1 0.956 ms 0.956 ms 712 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4941 +OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3094 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3097 +OpenCV_TopHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2642 +OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2635 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2666 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4900 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..aea059fb --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:24:06+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.06689,3.66504,4.76025], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 58, + "real_time": 1.1998714814926016e+01, + "cpu_time": 1.1997443879310346e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 3.0360637961522393e+01, + "cpu_time": 3.0356734521739146e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 322, + "real_time": 2.1898058237311262e+00, + "cpu_time": 2.1895025559006216e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 377, + "real_time": 1.8404441384168773e+00, + "cpu_time": 1.8402098037135266e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 248, + "real_time": 2.8030298680307402e+00, + "cpu_time": 2.8026356330645159e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4741, + "real_time": 1.4848858149487670e-01, + "cpu_time": 1.4846917338114329e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2547, + "real_time": 2.7368638209609553e-01, + "cpu_time": 2.7364592854338443e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101088, + "real_time": 6.9170411064131193e-03, + "cpu_time": 6.9161112792814165e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47818, + "real_time": 1.4660072789606434e-02, + "cpu_time": 1.4659816428959836e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2386, + "real_time": 2.8420215330499615e-01, + "cpu_time": 2.8416078960603486e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2544, + "real_time": 2.8090887269555770e-01, + "cpu_time": 2.8086637106918216e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1000, + "real_time": 6.0656908899545670e-01, + "cpu_time": 6.0655633599999881e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1866, + "real_time": 3.7647417416079382e-01, + "cpu_time": 3.7641147481243248e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 737, + "real_time": 9.6684982406235775e-01, + "cpu_time": 9.6672385888738199e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 692, + "real_time": 9.6456302232997271e-01, + "cpu_time": 9.6441563439306299e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4842, + "real_time": 1.4419430610637241e-01, + "cpu_time": 1.4419237773647245e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3046, + "real_time": 2.3022636120560447e-01, + "cpu_time": 2.3022371799080765e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3026, + "real_time": 2.3162924122589751e-01, + "cpu_time": 2.3162562723066799e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2617, + "real_time": 2.6727178663208961e-01, + "cpu_time": 2.6726527283148621e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2605, + "real_time": 2.6782371547080275e-01, + "cpu_time": 2.6782162571976953e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2647, + "real_time": 2.6472585017518091e-01, + "cpu_time": 2.6472381412920259e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4880, + "real_time": 1.4423252938345807e-01, + "cpu_time": 1.4421351024590140e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..91bbb2d4 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:24:06+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.07, 3.67, 4.76 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 12.0 ms 12.0 ms 58 +MLIR_Conv2D/1 30.4 ms 30.4 ms 23 +Buddy_Conv2D/1 2.19 ms 2.19 ms 322 +Buddy_Corr2D_Constant_Padding/1 1.84 ms 1.84 ms 377 +OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 248 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4741 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2547 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101088 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47818 +Buddy_Erosion2D_Constant_Padding/1 0.284 ms 0.284 ms 2386 +Buddy_Dilation2D_Constant_Padding/1 0.281 ms 0.281 ms 2544 +Buddy_Opening2D_Constant_Padding/1 0.607 ms 0.607 ms 1000 +Buddy_Closing2D_Constant_Padding/1 0.376 ms 0.376 ms 1866 +Buddy_TopHat2D_Constant_Padding/1 0.967 ms 0.967 ms 737 +Buddy_BottomHat2D_Constant_Padding/1 0.965 ms 0.964 ms 692 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4842 +OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3046 +OpenCV_Closing2D_Constant_Padding/1 0.232 ms 0.232 ms 3026 +OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2617 +OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2605 +OpenCV_MorphGrad2D_Constant_Padding/1 0.265 ms 0.265 ms 2647 +OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4880 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..24206b65 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:24:30+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.04297,3.61035,4.71191], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 58, + "real_time": 1.2075262786499385e+01, + "cpu_time": 1.2073443568965518e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 3.0510915362316631e+01, + "cpu_time": 3.0506920913043473e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 314, + "real_time": 2.1884666672747608e+00, + "cpu_time": 2.1881547292993626e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 381, + "real_time": 1.8304030354723844e+00, + "cpu_time": 1.8301742624671924e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 250, + "real_time": 2.8018697649240494e+00, + "cpu_time": 2.8014706559999993e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4721, + "real_time": 1.4815152991855834e-01, + "cpu_time": 1.4813229760643945e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2535, + "real_time": 2.7588403965594499e-01, + "cpu_time": 2.7584648875739659e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101044, + "real_time": 6.9154953151760795e-03, + "cpu_time": 6.9154101480543182e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47923, + "real_time": 1.4632370029027023e-02, + "cpu_time": 1.4632033240823825e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1991, + "real_time": 3.2995011431077570e-01, + "cpu_time": 3.2994335409342052e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1915, + "real_time": 2.8319770066607403e-01, + "cpu_time": 2.8319159164490931e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1138, + "real_time": 5.3577692049877923e-01, + "cpu_time": 5.3576669420035217e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1124, + "real_time": 4.9952752357912233e-01, + "cpu_time": 4.9943513434163772e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 719, + "real_time": 9.1689284657735648e-01, + "cpu_time": 9.1676078859526888e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 695, + "real_time": 9.6813468731564578e-01, + "cpu_time": 9.6800758705035905e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4798, + "real_time": 1.4492751205151355e-01, + "cpu_time": 1.4491701792413500e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2989, + "real_time": 2.3426035498248649e-01, + "cpu_time": 2.3425851689528218e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2969, + "real_time": 2.3408840153505603e-01, + "cpu_time": 2.3408375345234064e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2614, + "real_time": 2.6729449133838329e-01, + "cpu_time": 2.6729081216526446e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2630, + "real_time": 2.6423307454404721e-01, + "cpu_time": 2.6422895171102773e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2626, + "real_time": 2.7601832562244671e-01, + "cpu_time": 2.7601385605483625e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4881, + "real_time": 1.4363530366129135e-01, + "cpu_time": 1.4361626715836887e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..bba83998 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:24:30+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.04, 3.61, 4.71 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 12.1 ms 12.1 ms 58 +MLIR_Conv2D/1 30.5 ms 30.5 ms 23 +Buddy_Conv2D/1 2.19 ms 2.19 ms 314 +Buddy_Corr2D_Constant_Padding/1 1.83 ms 1.83 ms 381 +OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 250 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4721 +Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2535 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101044 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47923 +Buddy_Erosion2D_Constant_Padding/1 0.330 ms 0.330 ms 1991 +Buddy_Dilation2D_Constant_Padding/1 0.283 ms 0.283 ms 1915 +Buddy_Opening2D_Constant_Padding/1 0.536 ms 0.536 ms 1138 +Buddy_Closing2D_Constant_Padding/1 0.500 ms 0.499 ms 1124 +Buddy_TopHat2D_Constant_Padding/1 0.917 ms 0.917 ms 719 +Buddy_BottomHat2D_Constant_Padding/1 0.968 ms 0.968 ms 695 +OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4798 +OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 2989 +OpenCV_Closing2D_Constant_Padding/1 0.234 ms 0.234 ms 2969 +OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2614 +OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2630 +OpenCV_MorphGrad2D_Constant_Padding/1 0.276 ms 0.276 ms 2626 +OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4881 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..73c6c792 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:19:23+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [4.16504,4.64014,5.37695], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 139, + "real_time": 5.0173752385077712e+00, + "cpu_time": 5.0171431366906489e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 93, + "real_time": 7.5428272367164650e+00, + "cpu_time": 7.5426328494623673e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1245, + "real_time": 5.5684779123608841e-01, + "cpu_time": 5.5682440080321294e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 828, + "real_time": 8.4202265555875888e-01, + "cpu_time": 8.4200422584541035e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 531, + "real_time": 1.3090186522617628e+00, + "cpu_time": 1.3089821713747642e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4709, + "real_time": 1.4896581911007850e-01, + "cpu_time": 1.4896079018899980e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2577, + "real_time": 2.7260999168172845e-01, + "cpu_time": 2.7260413853317822e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100412, + "real_time": 6.9631285576930929e-03, + "cpu_time": 6.9626375632394480e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47961, + "real_time": 1.4606321100466758e-02, + "cpu_time": 1.4605708992723267e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2105, + "real_time": 3.0771779369854868e-01, + "cpu_time": 3.0768270926365782e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2485, + "real_time": 3.0552347601059698e-01, + "cpu_time": 3.0551694647887367e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1103, + "real_time": 5.5804573411522185e-01, + "cpu_time": 5.5798702266545741e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1192, + "real_time": 5.6027826984356710e-01, + "cpu_time": 5.6026445553691262e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 698, + "real_time": 9.4223976199299009e-01, + "cpu_time": 9.4221851719197713e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 710, + "real_time": 9.8398026656097093e-01, + "cpu_time": 9.8392653380281692e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4902, + "real_time": 1.4306782463262246e-01, + "cpu_time": 1.4306102203182383e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3075, + "real_time": 2.2767367886333931e-01, + "cpu_time": 2.2766505788617861e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3093, + "real_time": 2.2700202289692917e-01, + "cpu_time": 2.2700032945360529e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2641, + "real_time": 2.6595405740568195e-01, + "cpu_time": 2.6595034721696315e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2644, + "real_time": 2.6620356380127924e-01, + "cpu_time": 2.6620060287443309e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2674, + "real_time": 2.6205028271443287e-01, + "cpu_time": 2.6204837621540705e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4913, + "real_time": 1.4280434565172404e-01, + "cpu_time": 1.4280282454712012e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..a978a72e --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:19:23+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 4.17, 4.64, 5.38 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.02 ms 5.02 ms 139 +MLIR_Conv2D/1 7.54 ms 7.54 ms 93 +Buddy_Conv2D/1 0.557 ms 0.557 ms 1245 +Buddy_Corr2D_Constant_Padding/1 0.842 ms 0.842 ms 828 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 531 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4709 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2577 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100412 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47961 +Buddy_Erosion2D_Constant_Padding/1 0.308 ms 0.308 ms 2105 +Buddy_Dilation2D_Constant_Padding/1 0.306 ms 0.306 ms 2485 +Buddy_Opening2D_Constant_Padding/1 0.558 ms 0.558 ms 1103 +Buddy_Closing2D_Constant_Padding/1 0.560 ms 0.560 ms 1192 +Buddy_TopHat2D_Constant_Padding/1 0.942 ms 0.942 ms 698 +Buddy_BottomHat2D_Constant_Padding/1 0.984 ms 0.984 ms 710 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4902 +OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3075 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3093 +OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2641 +OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2644 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2674 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4913 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..a7d1c605 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:19:46+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.8335,4.5332,5.3252], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 138, + "real_time": 5.0004229111515954e+00, + "cpu_time": 5.0002145289855076e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 92, + "real_time": 7.6372407214797065e+00, + "cpu_time": 7.6370552065217412e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1355, + "real_time": 5.2936936821444891e-01, + "cpu_time": 5.2935688634686340e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 842, + "real_time": 8.3820865159929503e-01, + "cpu_time": 8.3819253444180519e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 532, + "real_time": 1.3121523915376878e+00, + "cpu_time": 1.3121226522556384e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4690, + "real_time": 1.4891786330036008e-01, + "cpu_time": 1.4891265415778249e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2560, + "real_time": 2.7395108627388254e-01, + "cpu_time": 2.7393973632812496e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100948, + "real_time": 6.9163674436630357e-03, + "cpu_time": 6.9160762570828491e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47793, + "real_time": 1.4607901661653386e-02, + "cpu_time": 1.4607340426422288e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2111, + "real_time": 2.9704664370167710e-01, + "cpu_time": 2.9704162955945018e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2524, + "real_time": 2.9669307827618913e-01, + "cpu_time": 2.9669044453248805e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1162, + "real_time": 5.5793046040809913e-01, + "cpu_time": 5.5792592254733153e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1000, + "real_time": 5.7320801541209221e-01, + "cpu_time": 5.7317745799999997e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 726, + "real_time": 9.6233903376524110e-01, + "cpu_time": 9.6231244352617151e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 698, + "real_time": 9.5446288244088950e-01, + "cpu_time": 9.5444632521490114e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4912, + "real_time": 1.4231482092958514e-01, + "cpu_time": 1.4231082552931573e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3094, + "real_time": 2.2435946595923084e-01, + "cpu_time": 2.2434201357466077e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3059, + "real_time": 2.2924085333090899e-01, + "cpu_time": 2.2922583458646567e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2607, + "real_time": 2.6636522107532212e-01, + "cpu_time": 2.6635171461449875e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2651, + "real_time": 2.6092289197629281e-01, + "cpu_time": 2.6090935382874364e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2641, + "real_time": 2.6426135376518939e-01, + "cpu_time": 2.6424787012495315e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4878, + "real_time": 1.4334160020455028e-01, + "cpu_time": 1.4333910229602290e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..5f275baa --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:19:46+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.83, 4.53, 5.33 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.00 ms 5.00 ms 138 +MLIR_Conv2D/1 7.64 ms 7.64 ms 92 +Buddy_Conv2D/1 0.529 ms 0.529 ms 1355 +Buddy_Corr2D_Constant_Padding/1 0.838 ms 0.838 ms 842 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 532 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4690 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2560 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100948 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47793 +Buddy_Erosion2D_Constant_Padding/1 0.297 ms 0.297 ms 2111 +Buddy_Dilation2D_Constant_Padding/1 0.297 ms 0.297 ms 2524 +Buddy_Opening2D_Constant_Padding/1 0.558 ms 0.558 ms 1162 +Buddy_Closing2D_Constant_Padding/1 0.573 ms 0.573 ms 1000 +Buddy_TopHat2D_Constant_Padding/1 0.962 ms 0.962 ms 726 +Buddy_BottomHat2D_Constant_Padding/1 0.954 ms 0.954 ms 698 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4912 +OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3094 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3059 +OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2607 +OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2651 +OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2641 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4878 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..0bdbe2b4 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:20:10+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.54834,4.40869,5.26172], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 135, + "real_time": 5.1553568078411951e+00, + "cpu_time": 5.1551722074074089e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 92, + "real_time": 7.5749951293287072e+00, + "cpu_time": 7.5747488586956546e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1284, + "real_time": 5.3980214248565128e-01, + "cpu_time": 5.3977712928348909e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 832, + "real_time": 8.4146023996604180e-01, + "cpu_time": 8.4142216826923055e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 533, + "real_time": 1.3132831169114103e+00, + "cpu_time": 1.3132458273921197e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4700, + "real_time": 1.4895170213694267e-01, + "cpu_time": 1.4894422382978711e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2571, + "real_time": 2.7564828269957381e-01, + "cpu_time": 2.7563723648385829e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101036, + "real_time": 6.9258848736223769e-03, + "cpu_time": 6.9255131933172341e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47859, + "real_time": 1.4628459030991201e-02, + "cpu_time": 1.4628155059654391e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2611, + "real_time": 2.7920378351385045e-01, + "cpu_time": 2.7918063960168527e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2244, + "real_time": 2.7905655742594693e-01, + "cpu_time": 2.7904662745097980e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1236, + "real_time": 5.2211519246348281e-01, + "cpu_time": 5.2209284142394796e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1165, + "real_time": 4.4711210760947462e-01, + "cpu_time": 4.4709575622317560e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 715, + "real_time": 9.3931627127674078e-01, + "cpu_time": 9.3925903916084019e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 723, + "real_time": 9.5567062888732424e-01, + "cpu_time": 9.5561843983402406e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4917, + "real_time": 1.4225059871341922e-01, + "cpu_time": 1.4224726479560690e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2994, + "real_time": 2.3417615903443151e-01, + "cpu_time": 2.3416278323313275e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2975, + "real_time": 2.3566543179399826e-01, + "cpu_time": 2.3564990386554591e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2582, + "real_time": 2.6798345968069537e-01, + "cpu_time": 2.6797476646010809e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2607, + "real_time": 2.7319310375178141e-01, + "cpu_time": 2.7318398925968451e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2649, + "real_time": 2.6686968568047292e-01, + "cpu_time": 2.6686162136655345e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4944, + "real_time": 1.4209808046329755e-01, + "cpu_time": 1.4209095307443359e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..29391a63 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:20:10+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.55, 4.41, 5.26 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.16 ms 5.16 ms 135 +MLIR_Conv2D/1 7.57 ms 7.57 ms 92 +Buddy_Conv2D/1 0.540 ms 0.540 ms 1284 +Buddy_Corr2D_Constant_Padding/1 0.841 ms 0.841 ms 832 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 533 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4700 +Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2571 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101036 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47859 +Buddy_Erosion2D_Constant_Padding/1 0.279 ms 0.279 ms 2611 +Buddy_Dilation2D_Constant_Padding/1 0.279 ms 0.279 ms 2244 +Buddy_Opening2D_Constant_Padding/1 0.522 ms 0.522 ms 1236 +Buddy_Closing2D_Constant_Padding/1 0.447 ms 0.447 ms 1165 +Buddy_TopHat2D_Constant_Padding/1 0.939 ms 0.939 ms 715 +Buddy_BottomHat2D_Constant_Padding/1 0.956 ms 0.956 ms 723 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4917 +OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 2994 +OpenCV_Closing2D_Constant_Padding/1 0.236 ms 0.236 ms 2975 +OpenCV_TopHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2582 +OpenCV_BottomHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2607 +OpenCV_MorphGrad2D_Constant_Padding/1 0.267 ms 0.267 ms 2649 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4944 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..f2f95365 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:20:33+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.47266,4.33301,5.21826], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 139, + "real_time": 5.0128592754439483e+00, + "cpu_time": 5.0124305971223029e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 92, + "real_time": 7.6231503373254901e+00, + "cpu_time": 7.6229129565217395e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1298, + "real_time": 5.2513539481144655e-01, + "cpu_time": 5.2512819799691823e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 840, + "real_time": 8.3748531926955494e-01, + "cpu_time": 8.3737668214285732e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 532, + "real_time": 1.3121627901393669e+00, + "cpu_time": 1.3119710808270681e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4720, + "real_time": 1.4909265436610933e-01, + "cpu_time": 1.4907397690677973e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2565, + "real_time": 2.7453704332282902e-01, + "cpu_time": 2.7450112709551661e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100545, + "real_time": 6.9568862453947087e-03, + "cpu_time": 6.9558761649012911e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47892, + "real_time": 1.4604156723145325e-02, + "cpu_time": 1.4601928923410997e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2439, + "real_time": 2.9473153659680573e-01, + "cpu_time": 2.9469328085280849e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2168, + "real_time": 2.8830999164838633e-01, + "cpu_time": 2.8826715083025839e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1168, + "real_time": 4.9684155566541299e-01, + "cpu_time": 4.9675358390410967e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1106, + "real_time": 4.7481091016024407e-01, + "cpu_time": 4.7480679113924112e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 689, + "real_time": 9.6393543659170067e-01, + "cpu_time": 9.6392819158200227e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 707, + "real_time": 9.6352926364545266e-01, + "cpu_time": 9.6352213578500723e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4934, + "real_time": 1.4176106599114616e-01, + "cpu_time": 1.4175841284961477e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3072, + "real_time": 2.2831910731232105e-01, + "cpu_time": 2.2831633561197898e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3104, + "real_time": 2.2609134987188675e-01, + "cpu_time": 2.2608958762886630e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2646, + "real_time": 2.6597667595291352e-01, + "cpu_time": 2.6597261753590368e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2601, + "real_time": 2.6558996427063758e-01, + "cpu_time": 2.6558628604382972e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2659, + "real_time": 2.6361091008487614e-01, + "cpu_time": 2.6360892515983414e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4871, + "real_time": 1.4462083764718411e-01, + "cpu_time": 1.4461981831246187e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..136e2000 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:20:33+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.47, 4.33, 5.22 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.01 ms 5.01 ms 139 +MLIR_Conv2D/1 7.62 ms 7.62 ms 92 +Buddy_Conv2D/1 0.525 ms 0.525 ms 1298 +Buddy_Corr2D_Constant_Padding/1 0.837 ms 0.837 ms 840 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 532 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4720 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2565 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100545 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47892 +Buddy_Erosion2D_Constant_Padding/1 0.295 ms 0.295 ms 2439 +Buddy_Dilation2D_Constant_Padding/1 0.288 ms 0.288 ms 2168 +Buddy_Opening2D_Constant_Padding/1 0.497 ms 0.497 ms 1168 +Buddy_Closing2D_Constant_Padding/1 0.475 ms 0.475 ms 1106 +Buddy_TopHat2D_Constant_Padding/1 0.964 ms 0.964 ms 689 +Buddy_BottomHat2D_Constant_Padding/1 0.964 ms 0.964 ms 707 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4934 +OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3072 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3104 +OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2646 +OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2601 +OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2659 +OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4871 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..2731abdb --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:20:55+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.31006,4.22461,5.1582], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 57, + "real_time": 1.2186227660430106e+01, + "cpu_time": 1.2185617228070177e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 3.0569540579681810e+01, + "cpu_time": 3.0567936217391306e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 212, + "real_time": 3.2612147899168842e+00, + "cpu_time": 3.2610398018867928e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 287, + "real_time": 2.4475226055454296e+00, + "cpu_time": 2.4473893937282227e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 162, + "real_time": 4.3507855339550678e+00, + "cpu_time": 4.3502307716049353e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4702, + "real_time": 1.4982172392977394e-01, + "cpu_time": 1.4980982752020414e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2551, + "real_time": 2.7482590690307734e-01, + "cpu_time": 2.7479107918463358e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101196, + "real_time": 6.9294706260273542e-03, + "cpu_time": 6.9288548559231639e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47024, + "real_time": 1.4854396933289805e-02, + "cpu_time": 1.4854171763354861e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2302, + "real_time": 3.2017572251327342e-01, + "cpu_time": 3.2016908123371007e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2501, + "real_time": 3.0623350749726963e-01, + "cpu_time": 3.0622376009596158e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1120, + "real_time": 5.8248242296810659e-01, + "cpu_time": 5.8245296071428587e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1000, + "real_time": 5.7962449267506599e-01, + "cpu_time": 5.7961194700000007e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 745, + "real_time": 9.4148450649824722e-01, + "cpu_time": 9.4141236644295290e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 714, + "real_time": 9.2218336792720135e-01, + "cpu_time": 9.2215487114845818e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4928, + "real_time": 1.4172616798745155e-01, + "cpu_time": 1.4172150892857185e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3029, + "real_time": 2.3071269989604012e-01, + "cpu_time": 2.3070404060746205e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3021, + "real_time": 2.3122206552263050e-01, + "cpu_time": 2.3121828334988390e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2590, + "real_time": 2.7057007674314804e-01, + "cpu_time": 2.7055772625482621e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2602, + "real_time": 2.6801066311344013e-01, + "cpu_time": 2.6800748847040845e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2621, + "real_time": 2.6615193267898313e-01, + "cpu_time": 2.6614443151468892e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4881, + "real_time": 1.4305517117133568e-01, + "cpu_time": 1.4305138004507262e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..ff61fa5e --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:20:55+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.31, 4.22, 5.16 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 12.2 ms 12.2 ms 57 +MLIR_Conv2D/1 30.6 ms 30.6 ms 23 +Buddy_Conv2D/1 3.26 ms 3.26 ms 212 +Buddy_Corr2D_Constant_Padding/1 2.45 ms 2.45 ms 287 +OpenCV_Filter2D_Constant_Padding/1 4.35 ms 4.35 ms 162 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4702 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2551 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101196 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47024 +Buddy_Erosion2D_Constant_Padding/1 0.320 ms 0.320 ms 2302 +Buddy_Dilation2D_Constant_Padding/1 0.306 ms 0.306 ms 2501 +Buddy_Opening2D_Constant_Padding/1 0.582 ms 0.582 ms 1120 +Buddy_Closing2D_Constant_Padding/1 0.580 ms 0.580 ms 1000 +Buddy_TopHat2D_Constant_Padding/1 0.941 ms 0.941 ms 745 +Buddy_BottomHat2D_Constant_Padding/1 0.922 ms 0.922 ms 714 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4928 +OpenCV_Opening2D_Constant_Padding/1 0.231 ms 0.231 ms 3029 +OpenCV_Closing2D_Constant_Padding/1 0.231 ms 0.231 ms 3021 +OpenCV_TopHat2D_Constant_Padding/1 0.271 ms 0.271 ms 2590 +OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2602 +OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2621 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4881 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..0c01eb95 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:21:20+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.26123,4.14062,5.10498], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 58, + "real_time": 1.2124883910191469e+01, + "cpu_time": 1.2124201741379311e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 3.0509588349124659e+01, + "cpu_time": 3.0508405608695647e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 210, + "real_time": 3.3209008652539480e+00, + "cpu_time": 3.3204979476190473e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 284, + "real_time": 2.4634649268758131e+00, + "cpu_time": 2.4633182781690137e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 163, + "real_time": 4.3028585064264897e+00, + "cpu_time": 4.3025189877300605e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4709, + "real_time": 1.4914729984278990e-01, + "cpu_time": 1.4913447419834347e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2576, + "real_time": 2.7474065843437401e-01, + "cpu_time": 2.7471746855590068e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101120, + "real_time": 6.9243410864514828e-03, + "cpu_time": 6.9239005241297502e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47365, + "real_time": 1.4754662157099186e-02, + "cpu_time": 1.4753382729863838e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2236, + "real_time": 3.0625470928941090e-01, + "cpu_time": 3.0622646198568843e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2168, + "real_time": 2.9962818306695493e-01, + "cpu_time": 2.9958592850553478e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1149, + "real_time": 5.4589507240539437e-01, + "cpu_time": 5.4582348563968719e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1158, + "real_time": 4.3890949512392746e-01, + "cpu_time": 4.3878794473229726e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 546, + "real_time": 9.8986280979690966e-01, + "cpu_time": 9.8963964468864618e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 707, + "real_time": 9.5082524916238798e-01, + "cpu_time": 9.5070903111739746e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4900, + "real_time": 1.4259748845076076e-01, + "cpu_time": 1.4257857632653057e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2916, + "real_time": 2.3652738105858304e-01, + "cpu_time": 2.3650978943758605e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2906, + "real_time": 2.4044473193385890e-01, + "cpu_time": 2.4042902030282134e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2393, + "real_time": 2.8040418241794168e-01, + "cpu_time": 2.8036821521103100e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2495, + "real_time": 2.7553062579913701e-01, + "cpu_time": 2.7551413947895820e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2554, + "real_time": 2.7534571463882501e-01, + "cpu_time": 2.7532446162881763e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4820, + "real_time": 1.4465537957878033e-01, + "cpu_time": 1.4464847676348577e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..b98f2736 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:21:20+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.26, 4.14, 5.10 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 12.1 ms 12.1 ms 58 +MLIR_Conv2D/1 30.5 ms 30.5 ms 23 +Buddy_Conv2D/1 3.32 ms 3.32 ms 210 +Buddy_Corr2D_Constant_Padding/1 2.46 ms 2.46 ms 284 +OpenCV_Filter2D_Constant_Padding/1 4.30 ms 4.30 ms 163 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4709 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2576 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101120 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47365 +Buddy_Erosion2D_Constant_Padding/1 0.306 ms 0.306 ms 2236 +Buddy_Dilation2D_Constant_Padding/1 0.300 ms 0.300 ms 2168 +Buddy_Opening2D_Constant_Padding/1 0.546 ms 0.546 ms 1149 +Buddy_Closing2D_Constant_Padding/1 0.439 ms 0.439 ms 1158 +Buddy_TopHat2D_Constant_Padding/1 0.990 ms 0.990 ms 546 +Buddy_BottomHat2D_Constant_Padding/1 0.951 ms 0.951 ms 707 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4900 +OpenCV_Opening2D_Constant_Padding/1 0.237 ms 0.237 ms 2916 +OpenCV_Closing2D_Constant_Padding/1 0.240 ms 0.240 ms 2906 +OpenCV_TopHat2D_Constant_Padding/1 0.280 ms 0.280 ms 2393 +OpenCV_BottomHat2D_Constant_Padding/1 0.276 ms 0.276 ms 2495 +OpenCV_MorphGrad2D_Constant_Padding/1 0.275 ms 0.275 ms 2554 +OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4820 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..b48f7511 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:21:44+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.1709,4.04785,5.04785], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 31, + "real_time": 2.2453694574294552e+01, + "cpu_time": 2.2453529516129038e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 6.9538136571645737e+01, + "cpu_time": 6.9535859299999998e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 105, + "real_time": 6.5831756662754781e+00, + "cpu_time": 6.5830738380952374e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 145, + "real_time": 4.8232713906929412e+00, + "cpu_time": 4.8231763103448246e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 78, + "real_time": 8.9926493760102826e+00, + "cpu_time": 8.9925164102564121e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4719, + "real_time": 1.4828112111376565e-01, + "cpu_time": 1.4827689298580210e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2578, + "real_time": 2.7298830056532675e-01, + "cpu_time": 2.7298321722265312e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 101158, + "real_time": 6.9221397090508292e-03, + "cpu_time": 6.9218924257102776e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47881, + "real_time": 1.4618278297898276e-02, + "cpu_time": 1.4618093627952623e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2555, + "real_time": 2.8827243325994906e-01, + "cpu_time": 2.8826175381604680e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2332, + "real_time": 2.7560231980338923e-01, + "cpu_time": 2.7559355960548881e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1208, + "real_time": 5.0620788042217690e-01, + "cpu_time": 5.0618569039735117e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1084, + "real_time": 5.1128306182108241e-01, + "cpu_time": 5.1127205996309932e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 712, + "real_time": 9.4518835101737064e-01, + "cpu_time": 9.4518094662921193e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 712, + "real_time": 9.4133523407946806e-01, + "cpu_time": 9.4132780337078559e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4962, + "real_time": 1.4084929968983068e-01, + "cpu_time": 1.4084782728738404e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2964, + "real_time": 2.3630762148482598e-01, + "cpu_time": 2.3630250944669354e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2964, + "real_time": 2.3484633740304131e-01, + "cpu_time": 2.3484455229419723e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2567, + "real_time": 2.7368364135114703e-01, + "cpu_time": 2.7367765095442176e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2585, + "real_time": 2.7532173425132117e-01, + "cpu_time": 2.7531788704061916e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2623, + "real_time": 2.6751557039114704e-01, + "cpu_time": 2.6751091498284435e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4903, + "real_time": 1.4305556644694115e-01, + "cpu_time": 1.4305444992861513e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..eada4cc4 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:21:44+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.17, 4.05, 5.05 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 22.5 ms 22.5 ms 31 +MLIR_Conv2D/1 69.5 ms 69.5 ms 10 +Buddy_Conv2D/1 6.58 ms 6.58 ms 105 +Buddy_Corr2D_Constant_Padding/1 4.82 ms 4.82 ms 145 +OpenCV_Filter2D_Constant_Padding/1 8.99 ms 8.99 ms 78 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4719 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2578 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101158 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47881 +Buddy_Erosion2D_Constant_Padding/1 0.288 ms 0.288 ms 2555 +Buddy_Dilation2D_Constant_Padding/1 0.276 ms 0.276 ms 2332 +Buddy_Opening2D_Constant_Padding/1 0.506 ms 0.506 ms 1208 +Buddy_Closing2D_Constant_Padding/1 0.511 ms 0.511 ms 1084 +Buddy_TopHat2D_Constant_Padding/1 0.945 ms 0.945 ms 712 +Buddy_BottomHat2D_Constant_Padding/1 0.941 ms 0.941 ms 712 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4962 +OpenCV_Opening2D_Constant_Padding/1 0.236 ms 0.236 ms 2964 +OpenCV_Closing2D_Constant_Padding/1 0.235 ms 0.235 ms 2964 +OpenCV_TopHat2D_Constant_Padding/1 0.274 ms 0.274 ms 2567 +OpenCV_BottomHat2D_Constant_Padding/1 0.275 ms 0.275 ms 2585 +OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2623 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4903 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..6a4e41aa --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:22:08+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.12158,3.979,5.00293], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 31, + "real_time": 2.2618762908443326e+01, + "cpu_time": 2.2618371935483868e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 6.9524862989783287e+01, + "cpu_time": 6.9523626999999991e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 106, + "real_time": 6.5605414433861675e+00, + "cpu_time": 6.5604865094339591e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 145, + "real_time": 4.8053482226256667e+00, + "cpu_time": 4.8052233655172412e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 77, + "real_time": 8.9911930263042450e+00, + "cpu_time": 8.9911225194805233e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4701, + "real_time": 1.4843348191028402e-01, + "cpu_time": 1.4843221718783239e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2581, + "real_time": 2.7286108469187126e-01, + "cpu_time": 2.7285397791553656e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100977, + "real_time": 6.9243114111851491e-03, + "cpu_time": 6.9242313695197901e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47816, + "real_time": 1.4641848158278912e-02, + "cpu_time": 1.4641626714907138e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2338, + "real_time": 3.2029897643777427e-01, + "cpu_time": 3.2029361420017116e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2051, + "real_time": 3.0370511982802473e-01, + "cpu_time": 3.0369885811799063e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1078, + "real_time": 5.8860488664462529e-01, + "cpu_time": 5.8859645361780988e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1041, + "real_time": 5.0554771645497409e-01, + "cpu_time": 5.0554403746397636e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 676, + "real_time": 9.8800656424295263e-01, + "cpu_time": 9.8798956065088772e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 681, + "real_time": 9.9229560440157305e-01, + "cpu_time": 9.9226728928046803e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4906, + "real_time": 1.4288953546840125e-01, + "cpu_time": 1.4288746534855290e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2934, + "real_time": 2.3779297538289587e-01, + "cpu_time": 2.3778759066121333e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2930, + "real_time": 2.3896080591165977e-01, + "cpu_time": 2.3895895563139949e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2536, + "real_time": 2.7688780405825997e-01, + "cpu_time": 2.7688168966877036e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2520, + "real_time": 2.7749545369592926e-01, + "cpu_time": 2.7749185238095248e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2580, + "real_time": 2.7239541126083033e-01, + "cpu_time": 2.7239121627906959e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4911, + "real_time": 1.4269597732809600e-01, + "cpu_time": 1.4269491427407904e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..d80f7666 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:22:08+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.12, 3.98, 5.00 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 22.6 ms 22.6 ms 31 +MLIR_Conv2D/1 69.5 ms 69.5 ms 10 +Buddy_Conv2D/1 6.56 ms 6.56 ms 106 +Buddy_Corr2D_Constant_Padding/1 4.81 ms 4.81 ms 145 +OpenCV_Filter2D_Constant_Padding/1 8.99 ms 8.99 ms 77 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4701 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2581 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100977 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47816 +Buddy_Erosion2D_Constant_Padding/1 0.320 ms 0.320 ms 2338 +Buddy_Dilation2D_Constant_Padding/1 0.304 ms 0.304 ms 2051 +Buddy_Opening2D_Constant_Padding/1 0.589 ms 0.589 ms 1078 +Buddy_Closing2D_Constant_Padding/1 0.506 ms 0.506 ms 1041 +Buddy_TopHat2D_Constant_Padding/1 0.988 ms 0.988 ms 676 +Buddy_BottomHat2D_Constant_Padding/1 0.992 ms 0.992 ms 681 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4906 +OpenCV_Opening2D_Constant_Padding/1 0.238 ms 0.238 ms 2934 +OpenCV_Closing2D_Constant_Padding/1 0.239 ms 0.239 ms 2930 +OpenCV_TopHat2D_Constant_Padding/1 0.277 ms 0.277 ms 2536 +OpenCV_BottomHat2D_Constant_Padding/1 0.277 ms 0.277 ms 2520 +OpenCV_MorphGrad2D_Constant_Padding/1 0.272 ms 0.272 ms 2580 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4911 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..c44dc8c1 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:22:31+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.0791,3.89893,4.94873], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 19, + "real_time": 3.6331885739376673e+01, + "cpu_time": 3.6330795842105260e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 6, + "real_time": 1.2376248215635617e+02, + "cpu_time": 1.2375778916666665e+02, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 61, + "real_time": 1.1424725783652947e+01, + "cpu_time": 1.1424135098360654e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 86, + "real_time": 8.1076199232145800e+00, + "cpu_time": 8.1065335465116259e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 116, + "real_time": 6.0680480673909187e+00, + "cpu_time": 6.0677083189655177e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4691, + "real_time": 1.4930519832651948e-01, + "cpu_time": 1.4926992773395867e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2573, + "real_time": 2.7582971932959383e-01, + "cpu_time": 2.7580277924601621e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100634, + "real_time": 6.9968619756626393e-03, + "cpu_time": 6.9965185225669300e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 43490, + "real_time": 1.4810458127178799e-02, + "cpu_time": 1.4809136491147392e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2224, + "real_time": 3.1000987975020633e-01, + "cpu_time": 3.0994578911870491e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2187, + "real_time": 3.0433967416560098e-01, + "cpu_time": 3.0430470416095112e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1079, + "real_time": 5.5283736834601194e-01, + "cpu_time": 5.5278239110287286e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1084, + "real_time": 4.6356722740009704e-01, + "cpu_time": 4.6354099538745458e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 671, + "real_time": 9.9508046837393116e-01, + "cpu_time": 9.9501271982116168e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 694, + "real_time": 9.6488717717980099e-01, + "cpu_time": 9.6476614553314033e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4872, + "real_time": 1.4356119720317265e-01, + "cpu_time": 1.4355267775041070e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3052, + "real_time": 2.3091042616928234e-01, + "cpu_time": 2.3088996100917433e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3055, + "real_time": 2.2933373499033688e-01, + "cpu_time": 2.2932497119476267e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2601, + "real_time": 2.6950421458434620e-01, + "cpu_time": 2.6948042560553648e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2655, + "real_time": 2.6802309769470589e-01, + "cpu_time": 2.6800724218455829e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2674, + "real_time": 2.6259915979834036e-01, + "cpu_time": 2.6257767726252695e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5002, + "real_time": 1.4069610041101091e-01, + "cpu_time": 1.4068704078368685e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..9d3be0fe --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:22:31+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.08, 3.90, 4.95 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 36.3 ms 36.3 ms 19 +MLIR_Conv2D/1 124 ms 124 ms 6 +Buddy_Conv2D/1 11.4 ms 11.4 ms 61 +Buddy_Corr2D_Constant_Padding/1 8.11 ms 8.11 ms 86 +OpenCV_Filter2D_Constant_Padding/1 6.07 ms 6.07 ms 116 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4691 +Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2573 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100634 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 43490 +Buddy_Erosion2D_Constant_Padding/1 0.310 ms 0.310 ms 2224 +Buddy_Dilation2D_Constant_Padding/1 0.304 ms 0.304 ms 2187 +Buddy_Opening2D_Constant_Padding/1 0.553 ms 0.553 ms 1079 +Buddy_Closing2D_Constant_Padding/1 0.464 ms 0.464 ms 1084 +Buddy_TopHat2D_Constant_Padding/1 0.995 ms 0.995 ms 671 +Buddy_BottomHat2D_Constant_Padding/1 0.965 ms 0.965 ms 694 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4872 +OpenCV_Opening2D_Constant_Padding/1 0.231 ms 0.231 ms 3052 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3055 +OpenCV_TopHat2D_Constant_Padding/1 0.270 ms 0.269 ms 2601 +OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2655 +OpenCV_MorphGrad2D_Constant_Padding/1 0.263 ms 0.263 ms 2674 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 5002 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..84797d22 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-09-07T14:22:55+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.05127,3.8252,4.89551], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 19, + "real_time": 3.5804433257956255e+01, + "cpu_time": 3.5803924105263171e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 6, + "real_time": 1.2386105209589005e+02, + "cpu_time": 1.2385802349999996e+02, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 62, + "real_time": 1.1243964154874124e+01, + "cpu_time": 1.1243781112903225e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 86, + "real_time": 8.1609252320472585e+00, + "cpu_time": 8.1606621744186025e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 115, + "real_time": 6.0534707230070364e+00, + "cpu_time": 6.0533277826086938e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4691, + "real_time": 1.4979387287060808e-01, + "cpu_time": 1.4979121743764645e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2560, + "real_time": 2.7500972501002252e-01, + "cpu_time": 2.7500316249999995e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 100967, + "real_time": 6.9533759701305943e-03, + "cpu_time": 6.9528367090237403e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47401, + "real_time": 1.4758331940115305e-02, + "cpu_time": 1.4757342018100885e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2397, + "real_time": 3.0549748811962507e-01, + "cpu_time": 3.0548149436796035e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2198, + "real_time": 2.8026699375791697e-01, + "cpu_time": 2.8025402229299401e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1071, + "real_time": 5.2040776743942285e-01, + "cpu_time": 5.2038497292250174e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1510, + "real_time": 3.8458729865929936e-01, + "cpu_time": 3.8456946887417259e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 695, + "real_time": 9.8917052792988236e-01, + "cpu_time": 9.8913716402877794e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 684, + "real_time": 9.6726054815869578e-01, + "cpu_time": 9.6718889619882953e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4878, + "real_time": 1.4255527756701927e-01, + "cpu_time": 1.4255166113161119e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3098, + "real_time": 2.2962313864483536e-01, + "cpu_time": 2.2960967947062669e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3043, + "real_time": 2.3499211144024237e-01, + "cpu_time": 2.3498555570160989e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2619, + "real_time": 2.6389677824296764e-01, + "cpu_time": 2.6389314623902327e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2682, + "real_time": 2.5938538102451142e-01, + "cpu_time": 2.5937977852349003e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2671, + "real_time": 2.6242634423079897e-01, + "cpu_time": 2.6242075926619268e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4966, + "real_time": 1.4111212860947409e-01, + "cpu_time": 1.4111019734192456e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..cf470496 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-09-07T14:22:55+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.05, 3.83, 4.90 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 35.8 ms 35.8 ms 19 +MLIR_Conv2D/1 124 ms 124 ms 6 +Buddy_Conv2D/1 11.2 ms 11.2 ms 62 +Buddy_Corr2D_Constant_Padding/1 8.16 ms 8.16 ms 86 +OpenCV_Filter2D_Constant_Padding/1 6.05 ms 6.05 ms 115 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4691 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2560 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100967 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47401 +Buddy_Erosion2D_Constant_Padding/1 0.305 ms 0.305 ms 2397 +Buddy_Dilation2D_Constant_Padding/1 0.280 ms 0.280 ms 2198 +Buddy_Opening2D_Constant_Padding/1 0.520 ms 0.520 ms 1071 +Buddy_Closing2D_Constant_Padding/1 0.385 ms 0.385 ms 1510 +Buddy_TopHat2D_Constant_Padding/1 0.989 ms 0.989 ms 695 +Buddy_BottomHat2D_Constant_Padding/1 0.967 ms 0.967 ms 684 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4878 +OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3098 +OpenCV_Closing2D_Constant_Padding/1 0.235 ms 0.235 ms 3043 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2619 +OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2682 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2671 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4966 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log new file mode 100644 index 00000000..8b8610c9 --- /dev/null +++ b/test_result/imageprocessing/image-processing-result.log @@ -0,0 +1,129 @@ +Benchmark results - Sun Sep 7 14:19:23 UTC 2025 +Testing SSE support +SSE is supported. +Running image-processing-benchmark for SSE +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Testing AVX2 support +AVX2 is supported. +[Success] … +Testing AVX2 support +AVX2 is supported. +Running image-processing-benchmark for AVX2 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +Running image-processing-benchmark for AVX2 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Testing AVX512 support +CPU does not support AVX512. +Testing NEON support +CPU does not support NEON. +[Success] … +Testing AVX512 support +CPU does not support AVX512. +Testing NEON support +CPU does not support NEON. diff --git a/test_result/vectorization/vectorization_matrix.json b/test_result/vectorization/vectorization_matrix.json new file mode 100644 index 00000000..90867db0 --- /dev/null +++ b/test_result/vectorization/vectorization_matrix.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-09-07T14:30:43+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./vectorization-matrix-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.97803,3.27148,4.20654], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "MLIR_MatMul/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "MLIR_MatMul/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 36434213, + "real_time": 1.9358822269403905e+01, + "cpu_time": 1.9357879145077188e+01, + "time_unit": "ns" + }, + { + "name": "MLIR_MatVec/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_MatVec/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 34006039, + "real_time": 2.0755498137698094e+01, + "cpu_time": 2.0755264822227605e+01, + "time_unit": "ns" + } + ] +} diff --git a/test_result/vectorization/vectorization_matrix.log b/test_result/vectorization/vectorization_matrix.log new file mode 100644 index 00000000..3fa79ef0 --- /dev/null +++ b/test_result/vectorization/vectorization_matrix.log @@ -0,0 +1,21 @@ +2025-09-07T14:30:43+00:00 +Running ./vectorization-matrix-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.98, 3.27, 4.21 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------- +MLIR_MatMul/1 19.4 ns 19.4 ns 36434213 +MLIR_MatVec/1 20.8 ns 20.8 ns 34006039 +-------------------------------------------------------- +MLIR_MatMul: MLIR MatMul Operation + Nested Loop +[ 18 18 18 18 18 18 18 18 18 18 ] +-------------------------------------------------------- +MLIR_MatVec: MLIR MatVec Operation +[ 18 18 18 18 18 18 18 18 18 18 ] diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log new file mode 100644 index 00000000..350170f5 --- /dev/null +++ b/test_result/vectorization/vectorization_result.log @@ -0,0 +1,217 @@ +Vectorization Benchmark - Sun Sep 7 14:30:36 UTC 2025 +[Info] Starting vectorization-matrix-benchmark build... +[Info] Running CMake configuration... +-- Detecting CXX compiler ABI info - failed +-- Check for working CXX compiler: /usr/bin/c++ +CMake Error: CMAKE_CXX_COMPILER not set, after EnableLanguage +CMake Error at /usr/share/cmake-3.22/Modules/CMakeTestCXXCompiler.cmake:49 (try_compile): + Failed to configure test project build system. +Call Stack (most recent call first): + CMakeLists.txt:11 (project) + + +-- Configuring incomplete, errors occurred! +See also "/home/buddy-complier-workspace/buddy-benchmark/build/CMakeFiles/CMakeOutput.log". +See also "/home/buddy-complier-workspace/buddy-benchmark/build/CMakeFiles/CMakeError.log". +[Info] Building vectorization-matrix-benchmark... +ninja: error: loading 'build.ninja': No such file or directory +-- The CXX compiler identification is GNU 11.4.0 +-- The C compiler identification is GNU 11.4.0 +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Check for working CXX compiler: /usr/bin/c++ - skipped +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Detecting C compiler ABI info +-- Detecting C compiler ABI info - done +-- Check for working C compiler: /usr/bin/cc - skipped +-- Detecting C compile features +-- Detecting C compile features - done +-- Configuring Target Architecture: avx512f +-- Configuring Target Triple: x86_64-unknown-linux-gnu +-- Configuring benchmarks: google +-- Looking for pthread.h +-- Looking for pthread.h - found +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success +-- Found Threads: TRUE +-- Performing Test HAVE_SSE +-- Performing Test HAVE_SSE - Success +-- SSE support - yes +-- Performing Test HAVE_AVX2 +-- Performing Test HAVE_AVX2 - Success +-- AVX2 support - yes +-- Performing Test HAVE_AVX512 +-- Performing Test HAVE_AVX512 - Failed +-- AVX512 support - no +-- Performing Test HAVE_NEON +-- Performing Test HAVE_NEON - Failed +-- Arm Neon support - no +-- Configuring done +-- Generating done +-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build +[Info] Building vectorization-matrix-benchmark... +[1/17] Generating mlir-matmul.o +[2/17] Generating mlir-matvec.o +[3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a +[4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a +[5/17] Creating directories for 'project_googlebenchmark' +[6/17] Performing download step (git clone) for 'project_googlebenchmark' +Cloning into 'project_googlebenchmark'... +HEAD is now at f91b6b4 bump version to 1.6 in preparation for release +[7/17] No update step for 'project_googlebenchmark' +[8/17] No patch step for 'project_googlebenchmark' +[9/17] Performing configure step for 'project_googlebenchmark' +-- The CXX compiler identification is GNU 11.4.0 +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Check for working CXX compiler: /usr/bin/c++ - skipped +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Failed to find LLVM FileCheck +-- Found Git: /usr/bin/git (found version "2.34.1") +-- git version: v1.6.0 normalized to 1.6.0 +-- Version: 1.6.0 +-- Performing Test HAVE_CXX_FLAG_STD_CXX11 +-- Performing Test HAVE_CXX_FLAG_STD_CXX11 - Success +-- Performing Test HAVE_CXX_FLAG_WALL +-- Performing Test HAVE_CXX_FLAG_WALL - Success +-- Performing Test HAVE_CXX_FLAG_WEXTRA +-- Performing Test HAVE_CXX_FLAG_WEXTRA - Success +-- Performing Test HAVE_CXX_FLAG_WSHADOW +-- Performing Test HAVE_CXX_FLAG_WSHADOW - Success +-- Performing Test HAVE_CXX_FLAG_WERROR +-- Performing Test HAVE_CXX_FLAG_WERROR - Success +-- Performing Test HAVE_CXX_FLAG_WSUGGEST_OVERRIDE +-- Performing Test HAVE_CXX_FLAG_WSUGGEST_OVERRIDE - Success +-- Performing Test HAVE_CXX_FLAG_PEDANTIC +-- Performing Test HAVE_CXX_FLAG_PEDANTIC - Success +-- Performing Test HAVE_CXX_FLAG_PEDANTIC_ERRORS +-- Performing Test HAVE_CXX_FLAG_PEDANTIC_ERRORS - Success +-- Performing Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32 +-- Performing Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32 - Failed +-- Performing Test HAVE_CXX_FLAG_FSTRICT_ALIASING +-- Performing Test HAVE_CXX_FLAG_FSTRICT_ALIASING - Success +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS - Success +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED - Success +-- Performing Test HAVE_CXX_FLAG_WSTRICT_ALIASING +-- Performing Test HAVE_CXX_FLAG_WSTRICT_ALIASING - Success +-- Performing Test HAVE_CXX_FLAG_WD654 +-- Performing Test HAVE_CXX_FLAG_WD654 - Failed +-- Performing Test HAVE_CXX_FLAG_WTHREAD_SAFETY +-- Performing Test HAVE_CXX_FLAG_WTHREAD_SAFETY - Failed +-- Performing Test HAVE_CXX_FLAG_COVERAGE +-- Performing Test HAVE_CXX_FLAG_COVERAGE - Success +-- Performing Test HAVE_STD_REGEX +CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message): + If you see build failures due to cross compilation, try setting + HAVE_STD_REGEX to 0 +Call Stack (most recent call first): + CMakeLists.txt:279 (cxx_feature_check) + + +-- Performing Test HAVE_STD_REGEX -- success +-- Performing Test HAVE_GNU_POSIX_REGEX +-- Performing Test HAVE_GNU_POSIX_REGEX -- failed to compile +-- Performing Test HAVE_POSIX_REGEX +CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message): + If you see build failures due to cross compilation, try setting + HAVE_POSIX_REGEX to 0 +Call Stack (most recent call first): + CMakeLists.txt:281 (cxx_feature_check) + + +-- Performing Test HAVE_POSIX_REGEX -- success +-- Performing Test HAVE_STEADY_CLOCK +CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message): + If you see build failures due to cross compilation, try setting + HAVE_STEADY_CLOCK to 0 +Call Stack (most recent call first): + CMakeLists.txt:290 (cxx_feature_check) + + +-- Performing Test HAVE_STEADY_CLOCK -- success +-- Looking for C++ include pthread.h +-- Looking for C++ include pthread.h - found +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success +-- Found Threads: TRUE +-- Configuring done +-- Generating done +-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build +[10/17] Performing build step for 'project_googlebenchmark' +[1/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o +[2/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[3/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o +[4/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[7/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o +[9/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[10/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o +[11/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o +[12/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o +[13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o +[15/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o +[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o +[19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o +[20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o +[21/22] Linking CXX static library src/libbenchmark.a +[22/22] Linking CXX static library src/libbenchmark_main.a +[11/17] Performing install step for 'project_googlebenchmark' +[0/1] Install the project... +-- Install configuration: "Release" +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/libbenchmark.a +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/libbenchmark_main.a +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/include/benchmark +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/include/benchmark/benchmark.h +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkConfig.cmake +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkConfigVersion.cmake +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/pkgconfig/benchmark.pc +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkTargets.cmake +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkTargets-release.cmake +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/AssemblyTests.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/_config.yml +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/dependencies.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/index.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/perf_counters.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/platform_specific_build_instructions.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/random_interleaving.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/releasing.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/tools.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/user_guide.md +[12/17] No test step for 'project_googlebenchmark' +[13/17] Completed 'project_googlebenchmark' +[14/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/Main.cpp.o +[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o +[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o +[17/17] Linking CXX executable bin/vectorization-matrix-benchmark +[Info] Running vectorization-matrix-benchmark... +2025-09-07T14:30:43+00:00 +Running ./vectorization-matrix-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.98, 3.27, 4.21 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------- +MLIR_MatMul/1 19.4 ns 19.4 ns 36434213 +MLIR_MatVec/1 20.8 ns 20.8 ns 34006039 +-------------------------------------------------------- +MLIR_MatMul: MLIR MatMul Operation + Nested Loop +[ 18 18 18 18 18 18 18 18 18 18 ] +-------------------------------------------------------- +MLIR_MatVec: MLIR MatVec Operation +[ 18 18 18 18 18 18 18 18 18 18 ] diff --git a/thirdparty/README.md b/thirdparty/README.md old mode 100644 new mode 100755 diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/utils/plots/CMakeLists.txt b/utils/plots/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/utils/plots/Main.cpp b/utils/plots/Main.cpp old mode 100644 new mode 100755 diff --git a/utils/plots/python/plot.py b/utils/plots/python/plot.py old mode 100644 new mode 100755 diff --git a/utils/plots/python/plotools/.gitignore b/utils/plots/python/plotools/.gitignore old mode 100644 new mode 100755 diff --git a/utils/plots/python/plotools/__init__.py b/utils/plots/python/plotools/__init__.py old mode 100644 new mode 100755 diff --git a/utils/plots/python/plotools/compare.py b/utils/plots/python/plotools/compare.py old mode 100644 new mode 100755 diff --git a/utils/plots/source_dir.h.in b/utils/plots/source_dir.h.in old mode 100644 new mode 100755 diff --git a/validation/AudioProcessing/AudioValidationLib.cpp b/validation/AudioProcessing/AudioValidationLib.cpp old mode 100644 new mode 100755 diff --git a/validation/AudioProcessing/CMakeLists.txt b/validation/AudioProcessing/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/validation/CMakeLists.txt b/validation/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/validation/Python/.gitignore b/validation/Python/.gitignore old mode 100644 new mode 100755 diff --git a/validation/Python/__init__.py b/validation/Python/__init__.py old mode 100644 new mode 100755 diff --git a/validation/Python/audio/__init__.py b/validation/Python/audio/__init__.py old mode 100644 new mode 100755 diff --git a/validation/Python/audio/audio_file.py b/validation/Python/audio/audio_file.py old mode 100644 new mode 100755 diff --git a/validation/Python/audio/audio_test.py b/validation/Python/audio/audio_test.py old mode 100644 new mode 100755 diff --git a/validation/Python/audio/fir.py b/validation/Python/audio/fir.py old mode 100644 new mode 100755 diff --git a/validation/Python/main.py b/validation/Python/main.py old mode 100644 new mode 100755 diff --git a/validation/Python/requirements.txt b/validation/Python/requirements.txt old mode 100644 new mode 100755 diff --git a/validation/Python/utils/__init__.py b/validation/Python/utils/__init__.py old mode 100644 new mode 100755 diff --git a/validation/Python/utils/audio_format.py b/validation/Python/utils/audio_format.py old mode 100644 new mode 100755 diff --git a/validation/Python/utils/lib_path.py b/validation/Python/utils/lib_path.py old mode 100644 new mode 100755 diff --git a/validation/README.md b/validation/README.md old mode 100644 new mode 100755