From 9def6515c6f7b5a03350b548502c713a31cb7570 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 30 Mar 2025 20:29:42 +0200 Subject: [PATCH 01/52] update benchmark and errors --- test/test_script_audioprocessing.sh | 47 ++ test/test_script_deeplearning.sh | 221 ++++++ test/test_script_geminiprocessing.sh | 97 +++ test/test_script_imageprocessing.sh | 58 ++ test/test_script_vectorizationprocessing.sh | 38 + .../build_results_crosscompile_summary.log | 23 + .../deeplearning/build_results_summary.log | 29 + .../deeplearning/dl-layer-ffn-benchmark.log | 18 + .../dl-layer-rmsnorm-benchmark.log | 18 + .../dl-layer-selfattention-benchmark.log | 18 + .../deeplearning/dl-model-lenet-benchmark.log | 19 + .../dl-model-mobilenetv3-benchmark.log | 19 + .../dl-model-resnet18-benchmark.log | 18 + .../dl-model-tinyllama-benchmark.log | 19 + .../dl-model-whisper-benchmark.log | 19 + .../dl-op-linalg-arithaddf-benchmark.log | 19 + .../dl-op-linalg-arithdivf-benchmark.log | 19 + .../dl-op-linalg-arithmulf-benchmark.log | 19 + .../dl-op-linalg-arithnegf-benchmark.log | 19 + .../dl-op-linalg-arithsubf-benchmark.log | 19 + .../dl-op-linalg-batch-matmul-benchmark.log | 25 + ...l-op-linalg-conv2d-nchw-fchw-benchmark.log | 19 + ...l-op-linalg-conv2d-nhwc-fhwc-benchmark.log | 21 + ...l-op-linalg-conv2d-nhwc-hwcf-benchmark.log | 19 + ...g-depthwise-conv-2d-nhwc-hwc-benchmark.log | 19 + .../dl-op-linalg-mathexp-benchmark.log | 19 + .../dl-op-linalg-mathfpow-benchmark.log | 19 + .../dl-op-linalg-mathrsqrt-benchmark.log | 19 + .../dl-op-linalg-matmul-benchmark.log | 22 + ...l-op-linalg-pooling-nhwc-sum-benchmark.log | 19 + .../dl-op-linalg-reduceaddf-benchmark.log | 10 + .../dl-op-linalg-reducemaxf-benchmark.log | 10 + ...p-linalg-softmax-exp-sum-div-benchmark.log | 19 + .../dl-op-matmul-transpose-b-benchmark.log | 21 + .../dl-op-tosa-transpose-benchmark.log | 17 + .../deeplearning/run_results_summary.log | 29 + test_result/geminiprocessing/build.log | 655 ++++++++++++++++++ .../geminiprocessing/cmake_configure.log | 37 + 38 files changed, 1755 insertions(+) create mode 100755 test/test_script_audioprocessing.sh create mode 100755 test/test_script_deeplearning.sh create mode 100755 test/test_script_geminiprocessing.sh create mode 100755 test/test_script_imageprocessing.sh create mode 100755 test/test_script_vectorizationprocessing.sh create mode 100644 test_result/deeplearning/build_results_crosscompile_summary.log create mode 100644 test_result/deeplearning/build_results_summary.log create mode 100644 test_result/deeplearning/dl-layer-ffn-benchmark.log create mode 100644 test_result/deeplearning/dl-layer-rmsnorm-benchmark.log create mode 100644 test_result/deeplearning/dl-layer-selfattention-benchmark.log create mode 100644 test_result/deeplearning/dl-model-lenet-benchmark.log create mode 100644 test_result/deeplearning/dl-model-mobilenetv3-benchmark.log create mode 100644 test_result/deeplearning/dl-model-resnet18-benchmark.log create mode 100644 test_result/deeplearning/dl-model-tinyllama-benchmark.log create mode 100644 test_result/deeplearning/dl-model-whisper-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-matmul-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log create mode 100644 test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log create mode 100644 test_result/deeplearning/dl-op-tosa-transpose-benchmark.log create mode 100644 test_result/deeplearning/run_results_summary.log create mode 100644 test_result/geminiprocessing/build.log create mode 100644 test_result/geminiprocessing/cmake_configure.log diff --git a/test/test_script_audioprocessing.sh b/test/test_script_audioprocessing.sh new file mode 100755 index 00000000..e16ae655 --- /dev/null +++ b/test/test_script_audioprocessing.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +export BUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build +export LLVM_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/llvm/build +cd /home/buddy-complier-workspace/buddy-benchmark +mkdir -p build && cd build +cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DAUDIO_PROCESSING_BENCHMARKS=ON \ + -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \ + -DKFR_DIR=/home/buddy-complier-workspace/buddy-benchmark/thirdparty/kfr \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} +ninja dap-op-iir-benchmark +cd bin +./dap-op-iir-benchmark + + + +cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DAUDIO_PROCESSING_BENCHMARKS=ON \ + -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \ + -DKFR_DIR=/home/buddy-complier-workspace/buddy-benchmark/thirdparty/kfr \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \ + -DPYTHON_BINARY_DIR="$(dirname "$(which python3)")" + +ninja audio-plot +cd bin +./audio-plot ../../benchmarks/AudioProcessing/Audios/NASA_Mars.wav ResultKFRIir.wav +# " +# root@4f445bb41579:/home/buddy-complier-workspace/buddy-benchmark/build/bin# ./audio-plot ../../benchmarks/AudioProcessing/Audios/NASA_Mars.wav ResultKFRIir.wav +# Plotting now... +# Traceback (most recent call last): +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plot.py", line 71, in +# compare_wave(args.file1, args.file2, part=args.part, +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 120, in compare_wave +# after, time2 = get_time_domain(file2) +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 60, in get_time_domain +# info, samples = get_info_and_samples(file) +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 38, in get_info_and_samples +# with wave.open(file, 'rb') as audio: +# File "/usr/lib/python3.10/wave.py", line 509, in open +# return Wave_read(f) +# File "/usr/lib/python3.10/wave.py", line 159, in __init__ +# f = builtins.open(f, 'rb') +# FileNotFoundError: [Errno 2] No such file or directory: 'ResultKFRIir.wav' +# " \ No newline at end of file diff --git a/test/test_script_deeplearning.sh b/test/test_script_deeplearning.sh new file mode 100755 index 00000000..f7c4d72e --- /dev/null +++ b/test/test_script_deeplearning.sh @@ -0,0 +1,221 @@ +#!/usr/bin/env bash + +################################################################################ +# 0. Script Setup +################################################################################ +# We disable "exit on error" so that if one benchmark fails to build or run, +# we can continue with the rest. +set +e + +################################################################################ +# 1. (Optional) Activate Python/Conda Environment +################################################################################ +# Uncomment or adjust if you use Anaconda/Miniconda: +# conda activate + + +################################################################################ +# 2. Build Each Benchmark (Continue Even If One Fails) +################################################################################ +BENCHMARK_TARGETS=( + # ------------------ + # Model-Level + # ------------------ + "dl-model-tinyllama-benchmark" + "dl-model-mobilenetv3-benchmark" + "dl-model-lenet-benchmark" + "dl-model-bert-benchmark" + "dl-model-whisper-benchmark" + "dl-model-resnet18-benchmark" + + # ------------------ + # Layer-Level + # ------------------ + "dl-layer-ffn-benchmark" + "dl-layer-selfattention-benchmark" + "dl-layer-rmsnorm-benchmark" + + # ------------------ + # Operation-Level + # ------------------ + "dl-op-linalg-matmul-benchmark" + "dl-op-linalg-conv2d-nchw-fchw-benchmark" + "dl-op-linalg-conv2d-nhwc-hwcf-benchmark" + "dl-op-linalg-conv2d-nhwc-fhwc-benchmark" + "dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark" + "dl-op-linalg-pooling-nhwc-sum-benchmark" + "dl-op-linalg-batch-matmul-benchmark" + "dl-op-linalg-arithaddf-benchmark" + "dl-op-linalg-arithdivf-benchmark" + "dl-op-linalg-arithmulf-benchmark" + "dl-op-linalg-arithnegf-benchmark" + "dl-op-linalg-arithsubf-benchmark" + "dl-op-linalg-mathfpow-benchmark" + "dl-op-linalg-mathrsqrt-benchmark" + "dl-op-linalg-mathexp-benchmark" + "dl-op-linalg-reduceaddf-benchmark" + "dl-op-linalg-reducemaxf-benchmark" + "dl-op-linalg-softmax-exp-sum-div-benchmark" + "dl-op-tosa-transpose-benchmark" + "dl-op-matmul-transpose-b-benchmark" +) + + +################################################################################ +# 3. Set Environment Variables for Buddy MLIR/LLVM +################################################################################ +# Adjust these paths according to your local setup: +BUDDY_MLIR_DIR="/home/buddy-complier-workspace/buddy-mlir" # The root directory of buddy-mlir +LLVM_BUILD_DIR="$BUDDY_MLIR_DIR/llvm/build" # The build dir for LLVM +BUDDY_BUILD_DIR="$BUDDY_MLIR_DIR/build" # The build dir for buddy-mlir + +# Export environment variables: +export BUDDY_MLIR_BUILD_DIR="$BUDDY_BUILD_DIR" +export LLVM_MLIR_BUILD_DIR="$LLVM_BUILD_DIR" +export PYTHONPATH="${LLVM_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_BUILD_DIR}/python_packages:${PYTHONPATH}" +export BENCHMARK_PATH="${BUDDY_MLIR_DIR}/../buddy-benchmark" +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" +echo "[Info] PYTHONPATH = ${PYTHONPATH}" + +################################################################################ +# 3. Prepare Build Folder and Run CMake +################################################################################ +cd "${BUDDY_MLIR_DIR}/../buddy-benchmark" || exit 1 +mkdir -p build +cd build || exit 1 + +echo "[Info] Running CMake configuration..." +cmake -G Ninja .. \ + -DDEEP_LEARNING_BENCHMARKS=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DBUDDY_MLIR_BUILD_DIR="${BUDDY_MLIR_BUILD_DIR}" \ + -DCMAKE_CXX_COMPILER="${LLVM_MLIR_BUILD_DIR}/bin/clang++" \ + -DCMAKE_C_COMPILER="${LLVM_MLIR_BUILD_DIR}/bin/clang" \ + -DCMAKE_CXX_FLAGS="-march=native" \ + -DCMAKE_C_FLAGS="-march=native" + + +################################################################################ +# 4. Prepare Build Folder and Run CMake +################################################################################ + +mkdir -p $BENCHMARK_PATH/test_result +mkdir -p $BENCHMARK_PATH/test_result/deeplearning +BUILD_LOG="${BENCHMARK_PATH}/test_result/deeplearning/build_results_summary.log" +> "${BUILD_LOG}" # Clear/create the file + +echo "[Info] Building all benchmarks with Ninja..." +for target in "${BENCHMARK_TARGETS[@]}"; do + echo "==> ninja ${target}" + if ninja "${target}"; then + echo "[Success] Build of '${target}'" | tee -a "${BUILD_LOG}" + else + echo "[Failed] Build of '${target}'" | tee -a "${BUILD_LOG}" + fi +done + +################################################################################ +# 5. Run Each Benchmark & Redirect Output (Continue Even If One Fails) +################################################################################ +cd bin || exit 1 + +RUN_LOG="${BENCHMARK_PATH}/test_result/deeplearning/run_results_summary.log" +> "${RUN_LOG}" # Clear/create the file + +echo "[Info] Running all benchmarks in ./bin..." +for target in "${BENCHMARK_TARGETS[@]}"; do + if [ -f "${target}" ]; then + echo "==> Running ${target}" + if "./${target}" > "${BENCHMARK_PATH}/test_result/deeplearning/${target}.log" 2>&1; then + echo "[Success] Run of '${target}'" | tee -a "${RUN_LOG}" + echo " Output saved to test_result/deeplearning/${target}.log" + else + echo "[Failed] Run of '${target}'" | tee -a "${RUN_LOG}" + echo " Output saved to test_result/deeplearning/${target}.log (May contain error info)" + fi + else + echo "[Missing] Executable not found for '${target}'" | tee -a "${RUN_LOG}" + fi +done + + +################################################################################ +# 6. Set Environment Variables for Buddy MLIR/LLVM for cross-compile +################################################################################ +# Adjust these paths according to your local setup: +BUDDY_MLIR_DIR="/home/buddy-complier-workspace/buddy-mlir" # The root directory of buddy-mlir +LLVM_BUILD_DIR="$BUDDY_MLIR_DIR/llvm/build" # The build dir for LLVM +BUDDY_BUILD_DIR="$BUDDY_MLIR_DIR/build" # The build dir for buddy-mlir + +# Export environment variables: +export BUDDY_MLIR_BUILD_DIR="$BUDDY_BUILD_DIR" +export LLVM_MLIR_BUILD_DIR="$LLVM_BUILD_DIR" +export PYTHONPATH="${LLVM_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_BUILD_DIR}/python_packages:${PYTHONPATH}" +export BUDDY_MLIR_BUILD_CROSS_DIR=${BUDDY_MLIR_BUILD_DIR}/../build +export RISCV_GNU_TOOLCHAIN=${BUDDY_MLIR_BUILD_DIR}/../thirdparty/riscv-gnu-toolchain +export RISCV_OMP_SHARED=${LLVM_MLIR_BUILD_DIR}/../build/lib/libomp.so +export BENCHMARK_PATH="${BUDDY_MLIR_DIR}/../buddy-benchmark" + +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" +echo "[Info] PYTHONPATH = ${PYTHONPATH}" + +################################################################################ +# 7. Prepare Build Folder and Run CMake +################################################################################ +cd "${BUDDY_MLIR_DIR}/../buddy-benchmark" || exit 1 +mkdir -p build +cd build || exit 1 + +echo "[Info] Running CMake configuration..." +cmake -G Ninja .. \ + -DDEEP_LEARNING_BENCHMARKS=ON \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DCROSS_COMPILE_RVV=ON \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=riscv \ + -DCMAKE_C_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang \ + -DRISCV_GNU_TOOLCHAIN=${RISCV_GNU_TOOLCHAIN} \ + -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \ + -DCMAKE_C_FLAGS="-march=rv64gcv --target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN}/sysroot --gcc-toolchain=${RISCV_GNU_TOOLCHAIN} -fPIC" \ + -DCMAKE_CXX_FLAGS="-march=rv64gcv --target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN}/sysroot --gcc-toolchain=${RISCV_GNU_TOOLCHAIN} -fPIC" \ + -DRISCV_OMP_SHARED=${RISCV_OMP_SHARED} \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \ + -DBUDDY_MLIR_BUILD_CROSS_DIR=${BUDDY_MLIR_BUILD_CROSS_DIR} \ + -DBUDDY_MLIR_CROSS_LIB_DIR=${BUDDY_MLIR_BUILD_CROSS_DIR}/lib + +################################################################################ +# 8. Prepare Build Folder and Run CMake for cross-compile +################################################################################ + +mkdir -p $BENCHMARK_PATH/test_result +BUILD_LOG="${BENCHMARK_PATH}/test_result/deeplearning/build_results_crosscompile_summary.log" +> "${BUILD_LOG}" # Clear/create the file + +echo "[Info] Building all benchmarks with Ninja..." +for target in "${BENCHMARK_TARGETS[@]}"; do + echo "==> ninja ${target}" + if ninja "${target}"; then + echo "[Success] Build of '${target}'" | tee -a "${BUILD_LOG}" + else + echo "[Failed] Build of '${target}'" | tee -a "${BUILD_LOG}" + fi +done + + +echo +echo "[Info] All build/run steps completed (script did not stop on failures)." +echo "[Info] Build summary: ${BUILD_LOG}" +echo "[Info] Run summary: ${RUN_LOG}" + + +cmake -G Ninja .. \ + -DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \ + -DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \ + -DPython3_EXECUTABLE=$(which python3) \ No newline at end of file diff --git a/test/test_script_geminiprocessing.sh b/test/test_script_geminiprocessing.sh new file mode 100755 index 00000000..b151cb5b --- /dev/null +++ b/test/test_script_geminiprocessing.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash + +export BUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build +export LLVM_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/llvm/build +export CHIPYARD_DIR=/home/buddy-complier-workspace/chipyard +export BUDDY_BENCHMARK_DIR=/home/buddy-complier-workspace/buddy-benchmark + +cd "${CHIPYARD_DIR}" +git config --global --add safe.directory /home/buddy-complier-workspace/chipyard +git checkout 1.8.1 + +# Initialize and update the 'generators/gemmini' submodule and any submodules inside it. +git config --global --add safe.directory /home/buddy-complier-workspace/chipyard/generators/gemmini +git submodule update --init --recursive generators/gemmini + +############################################# +# 1. Initialize Conda for the current shell +############################################# +conda init bash # or "conda init" if you’re already in a bash shell + +############################################# +# 2. Check if 'chipyard' environment exists +############################################# +if conda env list | grep -qE '^[^ ]*\s+chipyard\s'; then + echo "[INFO] Found existing 'chipyard' environment. Activating it." +else + echo "[INFO] 'chipyard' environment not found. Creating it..." + # Example creation command - adjust packages as needed + conda create -y -n chipyard python=3.10 \ + cmake ninja \ + # plus any other dependencies needed... +fi + +conda activate chipyard + +############################################# +# 3. Source build-setup and env.sh +############################################# +# If your script uses conda-lock or has pinned requirements, +# you might need to call build-setup.sh so it *creates* the +# .conda-env environment. But be sure it doesn’t conflict +# with your newly created 'chipyard' environment. +source build-setup.sh esp-tools +source env.sh + +############################################# +# 4. Proceed with your build +############################################# +cd "${BUDDY_BENCHMARK_DIR}" +rm -rf build +# Remove any existing build directory and create a fresh one. +mkdir -p build && cd build + +RESULT_DIR="${BUDDY_BENCHMARK_DIR}/test_result/geminiprocessing" +mkdir -p "${RESULT_DIR}" + +export C_PATH=$(which riscv64-unknown-linux-gnu-gcc) +export CXX_PATH=$(which riscv64-unknown-linux-gnu-g++) +export CLinker_PATH=$(which riscv64-unknown-linux-gnu-ld) + +# Print Address here +echo "[Info] C_COMPILER_PATH = ${C_PATH}" +echo "[Info] CXX_COMPILER_PATH = ${CXX_PATH}" +echo "[Info] C_LINKER_PATH = ${CLinker_PATH}" +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" +echo "[Info] CHIPYARD_DIR = ${CHIPYARD_DIR}" +echo "[Info] BUDDY_BENCHMARK_DIR = ${BUDDY_BENCHMARK_DIR}" +echo "[Info] RESULT_DIR = ${RESULT_DIR}" + +echo "[Info] Running CMake configuration..." +cmake -G Ninja .. \ + -DCMAKE_C_COMPILER=${C_PATH} \ + -DCMAKE_CXX_COMPILER=${CXX_PATH} \ + -DCMAKE_LINKER=${CLinker_PATH} \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \ + -DGEMMINI_INCLUDE_DIR=${CHIPYARD_DIR}/generators/gemmini/software/gemmini-rocc-tests/include/ \ + -DGEMMINI_BENCHMARKS=ON \ + 2>&1 | tee "${RESULT_DIR}/cmake_configure.log" + +ninja 2>&1 | tee "${RESULT_DIR}/build.log" + +# ```[1/21] Creating directories for 'project_googlebenchmark' +# [2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +# FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +# riscv64-unknown-linux-gnu-gcc -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c +# /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4': +# /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given +# 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); +# | ^ +# In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23:``` + +# cd bin +# ./vectorization-matrix-benchmark 2>&1 | tee "${RESULT_DIR}/run.log" + +echo "[Info] CMake, build, and run logs are stored in ${RESULT_DIR}" diff --git a/test/test_script_imageprocessing.sh b/test/test_script_imageprocessing.sh new file mode 100755 index 00000000..ab89c912 --- /dev/null +++ b/test/test_script_imageprocessing.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +# NEW: Create results directory and update log file path +RESULT_DIR="${PWD}/test_result/imageprocessing" +mkdir -p "$RESULT_DIR" +LOG="${RESULT_DIR}/image-processing-result.log" +echo "Benchmark results - $(date)" > "$LOG" + +# Function to check CPU flag support +supports() { + local flag=$(echo "$1" | tr '[:upper:]' '[:lower:]') + if grep -qi "$flag" /proc/cpuinfo; then + return 0 + else + return 1 + fi +} + +features=("SSE" "AVX2" "AVX512" "NEON") +images=("../benchmarks/ImageProcessing/Images/YuTu.png") +kernels=("prewittKernelAlign" "sobel3x3KernelAlign" "sobel5x5KernelAlign" "sobel7x7KernelAlign" "sobel9x9KernelAlign" "laplacianKernelAlign" "logKernelAlign") +kernelmorphs=("random3x3KernelAlignInt") +boundaries=("CONSTANT_PADDING" "REPLICATE_PADDING") + +for feature in "${features[@]}"; do + echo "Testing $feature support" | tee -a "$LOG" + if supports "$feature"; then + echo "$feature is supported." | tee -a "$LOG" + mkdir -p build_${feature} && cd build_${feature} + cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DIMAGE_PROCESSING_BENCHMARKS=ON \ + -DOpenCV_DIR=$PWD/../thirdparty/opencv/build/ \ + -DEIGEN_DIR=$PWD/../thirdparty/eigen/ \ + -DBUDDY_OPT_ATTR=$(echo "$feature" | tr '[:upper:]' '[:lower:]') \ + -DBUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build + ninja image-processing-benchmark + echo "Running image-processing-benchmark for $feature" | tee -a "$LOG" + for img in "${images[@]}"; do + for kern in "${kernels[@]}"; do + for morph in "${kernelmorphs[@]}"; do + for boundary in "${boundaries[@]}"; do + echo "Running: $img $kern $morph $boundary" | tee -a "$LOG" + ./bin/image-processing-benchmark "$img" "$kern" "$morph" "$boundary" 2>&1 | grep -v "Saved PNG file." >> "$LOG" + done + done + done + done + cd .. + else + echo "CPU does not support $feature." | tee -a "$LOG" + fi +done + +# NEW: Clean up build directories +for feature in "${features[@]}"; do + rm -rf "build_${feature}" +done \ No newline at end of file diff --git a/test/test_script_vectorizationprocessing.sh b/test/test_script_vectorizationprocessing.sh new file mode 100755 index 00000000..97899922 --- /dev/null +++ b/test/test_script_vectorizationprocessing.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +################################################################################ +# 1. Script Setup +################################################################################ +set -e +BUDDY_MLIR_BUILD_DIR="/home/buddy-complier-workspace/buddy-mlir/build" +LLVM_MLIR_BUILD_DIR="/home/buddy-complier-workspace/buddy-mlir/llvm/build" + +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" + +RESULT_DIR="${PWD}/test_result/vectorization" +mkdir -p "${RESULT_DIR}" +LOG_FILE="${RESULT_DIR}/vectorization_result.log" +echo "Vectorization Benchmark - $(date)" > "${LOG_FILE}" + +################################################################################ +# 2. Build Benchmark +################################################################################ +mkdir -p build && cd build +echo "[Info] Running CMake configuration..." | tee -a "${LOG_FILE}" +cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DVECTORIZATION_BENCHMARKS=ON \ + -DBUDDY_MLIR_BUILD_DIR="${BUDDY_MLIR_BUILD_DIR}" 2>&1 | tee -a "${LOG_FILE}" + +echo "[Info] Building vectorization-matrix-benchmark..." | tee -a "${LOG_FILE}" +ninja vectorization-matrix-benchmark 2>&1 | tee -a "${LOG_FILE}" + +################################################################################ +# 3. Run Benchmark +################################################################################ +cd bin +echo "[Info] Running vectorization-matrix-benchmark..." | tee -a "${LOG_FILE}" +./vectorization-matrix-benchmark 2>&1 | tee -a "${LOG_FILE}" + +echo "[Info] Benchmark completed. Log saved to ${LOG_FILE}" \ No newline at end of file diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log new file mode 100644 index 00000000..df608647 --- /dev/null +++ b/test_result/deeplearning/build_results_crosscompile_summary.log @@ -0,0 +1,23 @@ +[Failed] Build of 'dl-model-tinyllama-benchmark' +[Failed] Build of 'dl-model-mobilenetv3-benchmark' +[Failed] Build of 'dl-model-lenet-benchmark' +[Failed] Build of 'dl-model-bert-benchmark' +[Failed] Build of 'dl-model-whisper-benchmark' +[Failed] Build of 'dl-model-resnet18-benchmark' +[Failed] Build of 'dl-layer-ffn-benchmark' +[Failed] Build of 'dl-layer-selfattention-benchmark' +[Failed] Build of 'dl-layer-rmsnorm-benchmark' +[Failed] Build of 'dl-op-linalg-matmul-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Failed] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Failed] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Failed] Build of 'dl-op-linalg-arithaddf-benchmark' +[Failed] Build of 'dl-op-linalg-arithdivf-benchmark' +[Failed] Build of 'dl-op-linalg-arithmulf-benchmark' +[Failed] Build of 'dl-op-linalg-arithnegf-benchmark' +[Failed] Build of 'dl-op-linalg-arithsubf-benchmark' +[Failed] Build of 'dl-op-linalg-mathfpow-benchmark' +[Failed] Build of 'dl-op-linalg-mathrsqrt-benchmark' diff --git a/test_result/deeplearning/build_results_summary.log b/test_result/deeplearning/build_results_summary.log new file mode 100644 index 00000000..de1252ba --- /dev/null +++ b/test_result/deeplearning/build_results_summary.log @@ -0,0 +1,29 @@ +[Success] Build of 'dl-model-tinyllama-benchmark' +[Success] Build of 'dl-model-mobilenetv3-benchmark' +[Success] Build of 'dl-model-lenet-benchmark' +[Failed] Build of 'dl-model-bert-benchmark' +[Success] Build of 'dl-model-whisper-benchmark' +[Success] Build of 'dl-model-resnet18-benchmark' +[Success] Build of 'dl-layer-ffn-benchmark' +[Success] Build of 'dl-layer-selfattention-benchmark' +[Success] Build of 'dl-layer-rmsnorm-benchmark' +[Success] Build of 'dl-op-linalg-matmul-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Success] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Success] Build of 'dl-op-linalg-arithaddf-benchmark' +[Success] Build of 'dl-op-linalg-arithdivf-benchmark' +[Success] Build of 'dl-op-linalg-arithmulf-benchmark' +[Success] Build of 'dl-op-linalg-arithnegf-benchmark' +[Success] Build of 'dl-op-linalg-arithsubf-benchmark' +[Success] Build of 'dl-op-linalg-mathfpow-benchmark' +[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Build of 'dl-op-linalg-mathexp-benchmark' +[Success] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Success] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Success] Build of 'dl-op-tosa-transpose-benchmark' +[Success] Build of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log new file mode 100644 index 00000000..677ea1bb --- /dev/null +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.log @@ -0,0 +1,18 @@ +2025-03-30T12:12:58+00:00 +Running ./dl-layer-ffn-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.39, 6.06 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------- +DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10641 +DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 26024 +----------------------------------------------------------- +Correctness Verification: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log new file mode 100644 index 00000000..2ce19761 --- /dev/null +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log @@ -0,0 +1,18 @@ +2025-03-30T12:13:02+00:00 +Running ./dl-layer-rmsnorm-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.39, 6.06 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------ +Benchmark Time CPU Iterations +------------------------------------------------------------------------------ +DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 356344 +DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 764783 +----------------------------------------------------------- +Correctness Verification: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log new file mode 100644 index 00000000..17623060 --- /dev/null +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.log @@ -0,0 +1,18 @@ +2025-03-30T12:13:00+00:00 +Running ./dl-layer-selfattention-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.39, 6.06 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------- +DL_LAYER_ATTENTION/Scalar 4.68 ms 4.68 ms 150 +DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 455 +----------------------------------------------------------- +Correctness Verification: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log new file mode 100644 index 00000000..a1ce7074 --- /dev/null +++ b/test_result/deeplearning/dl-model-lenet-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:09:01+00:00 +Running ./dl-model-lenet-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.04, 1.86, 7.56 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +----------------------------------------------------------------------------- +Benchmark Time CPU Iterations +----------------------------------------------------------------------------- +DL_MODEL_LENET/Auto_Vectorization 0.164 ms 0.164 ms 4368 +DL_MODEL_LENET/Buddy_Vectorization 0.154 ms 0.154 ms 5094 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log new file mode 100644 index 00000000..8bed1b85 --- /dev/null +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:08:59+00:00 +Running ./dl-model-mobilenetv3-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.04, 1.86, 7.56 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +----------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +----------------------------------------------------------------------------------- +BM_MobileNet_V3/BM_MobileNet_V3_scalar 36.7 ms 36.7 ms 18 +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.6 ms 32.6 ms 22 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.log b/test_result/deeplearning/dl-model-resnet18-benchmark.log new file mode 100644 index 00000000..e95722a5 --- /dev/null +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.log @@ -0,0 +1,18 @@ +2025-03-30T12:12:55+00:00 +Running ./dl-model-resnet18-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.39, 6.08 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------- +DL_MODEL_Resnet18/Auto_Vectorization 723 ms 722 ms 1 +DL_MODEL_Resnet18/Buddy_Vectorization 726 ms 718 ms 1 +----------------------------------------------------------- +Correctness Verification: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.log b/test_result/deeplearning/dl-model-tinyllama-benchmark.log new file mode 100644 index 00000000..e07df494 --- /dev/null +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:03:25+00:00 +Running ./dl-model-tinyllama-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.75, 3.61, 10.42 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +---------------------------------------------------------------------------- +Benchmark Time CPU Iterations +---------------------------------------------------------------------------- +DL_MODEL_TINYLLAMA/scalar 158531 ms 158516 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt 9744 ms 9735 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt_omp 7716 ms 7038 ms 1 +---------- Verification ---------- +matmul_opt PASS +matmul_opt_omp PASS diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.log b/test_result/deeplearning/dl-model-whisper-benchmark.log new file mode 100644 index 00000000..e4534bbc --- /dev/null +++ b/test_result/deeplearning/dl-model-whisper-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:09:03+00:00 +Running ./dl-model-whisper-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.04, 1.84, 7.52 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------------------- +DL_MODEL_Whisper/Auto_Vectorization 78390 ms 78388 ms 1 +DL_MODEL_Whisper/Buddy_Vectorization 36641 ms 36637 ms 1 +----------------------------------------------------------- +Correctness Verification for Output1: PASS +Correctness Verification for Output2: FAIL +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log new file mode 100644 index 00000000..d89cd1e1 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:31+00:00 +Running ./dl-op-linalg-arithaddf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.35, 5.89 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_ADDF_SCALAR 0.030 ms 0.030 ms 23440 +BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 175032 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log new file mode 100644 index 00000000..02d6e568 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:33+00:00 +Running ./dl-op-linalg-arithdivf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.34, 5.87 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_DIVF_SCALAR 0.029 ms 0.029 ms 23951 +BM_DIVF_AutoVectorization 0.009 ms 0.009 ms 73837 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log new file mode 100644 index 00000000..4fa4ffde --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:35+00:00 +Running ./dl-op-linalg-arithmulf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.34, 5.87 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_MULF_SCALAR 0.029 ms 0.029 ms 23549 +BM_MULF_AutoVectorization 0.004 ms 0.004 ms 174752 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log new file mode 100644 index 00000000..e6387a2a --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:37+00:00 +Running ./dl-op-linalg-arithnegf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.34, 5.87 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_NEGF_SCALAR 0.022 ms 0.022 ms 30658 +BM_NEGF_AutoVectorization 0.003 ms 0.003 ms 245490 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log new file mode 100644 index 00000000..3a9efa27 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:39+00:00 +Running ./dl-op-linalg-arithsubf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.34, 5.84 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_SUBF_SCALAR 0.029 ms 0.029 ms 23697 +BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 147910 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log new file mode 100644 index 00000000..d187e4d9 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log @@ -0,0 +1,25 @@ +2025-03-30T12:13:21+00:00 +Running ./dl-op-linalg-batch-matmul-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.36, 5.95 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +--------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +--------------------------------------------------------------------------------------------- +DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3525 ms 3517 ms 1 +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 976 ms 976 ms 1 +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 189 ms 189 ms 1 +DL_OPS_BATCH_MATMUL/Tile/iterations:1 109 ms 109 ms 1 +DL_OPS_BATCH_MATMUL/SCF/iterations:1 117 ms 117 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 353 ms 353 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 75.4 ms 38.2 ms 1 +---------- Verification ---------- +Tile PASS +SCF PASS +BROADCAST PASS +BROADCAST_OMP PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log new file mode 100644 index 00000000..4e58a246 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:15+00:00 +Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.37, 5.97 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------- +BM_Conv2DNchwFchw_SCALAR 283 ms 283 ms 2 +BM_Conv2DNchwFchw_Im2col 10.2 ms 10.2 ms 68 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log new file mode 100644 index 00000000..08115149 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log @@ -0,0 +1,21 @@ +2025-03-30T12:13:18+00:00 +Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.36, 5.95 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +--------------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +--------------------------------------------------------------------------------------------------- +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 73.5 ms 73.5 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.35 ms 9.35 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.74 ms 1.74 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.73 ms 1.73 ms 5 +---------- Verification ---------- +auto_vectorization PASS +vectorization PASS +vec_tile PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log new file mode 100644 index 00000000..f09a4101 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:17+00:00 +Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.37, 5.97 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +--------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +--------------------------------------------------------------------------------- +BM_CONV_2D_NHWC_HWCF_SCALAR 32.4 ms 32.4 ms 22 +BM_CONV_2D_NHWC_HWCF_AutoVectorization 5.83 ms 5.83 ms 120 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log new file mode 100644 index 00000000..c761a6b6 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:19+00:00 +Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.36, 5.95 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------------------------------------ +Benchmark Time CPU Iterations +------------------------------------------------------------------------------------------------------------ +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 6.25 ms 6.25 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.71 ms 1.71 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.128 ms 0.128 ms 5 +---------- Verification ---------- +auto_vectorization PASS +vectorization PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log new file mode 100644 index 00000000..c3ecd554 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:45+00:00 +Running ./dl-op-linalg-mathexp-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.33, 5.81 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------- +BM_EXP_SCALAR 0.046 ms 0.046 ms 15309 +BM_EXP_AutoVectorization 0.032 ms 0.032 ms 21998 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log new file mode 100644 index 00000000..018b3377 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:42+00:00 +Running ./dl-op-linalg-mathfpow-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.34, 5.84 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_FPOW_SCALAR 0.084 ms 0.084 ms 8347 +BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12328 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log new file mode 100644 index 00000000..bf045f07 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:43+00:00 +Running ./dl-op-linalg-mathrsqrt-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.33, 5.81 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +--------------------------------------------------------------------- +Benchmark Time CPU Iterations +--------------------------------------------------------------------- +BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9497 +BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 161025 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log new file mode 100644 index 00000000..412446c6 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log @@ -0,0 +1,22 @@ +2025-03-30T12:13:04+00:00 +Running ./dl-op-linalg-matmul-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.38, 6.03 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------------------- +DL_OPS_MATMUL/scalar_O0/iterations:1 3716 ms 3716 ms 1 +DL_OPS_MATMUL/scalar_O3/iterations:1 3312 ms 3312 ms 1 +DL_OPS_MATMUL/tile/iterations:1 117 ms 117 ms 1 +DL_OPS_MATMUL/vec/iterations:1 140 ms 140 ms 1 +DL_OPS_MATMUL/vec_omp/iterations:1 20.5 ms 18.8 ms 1 +---------- Verification ---------- +tile PASS +vec PASS +vec_omp PASS diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log new file mode 100644 index 00000000..f9296017 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:19+00:00 +Running ./dl-op-linalg-pooling-nhwc-sum-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.36, 5.95 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------- +BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 3007 +BM_POOLING_NHWC_SUM_AutoVectorization 0.042 ms 0.042 ms 16752 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log new file mode 100644 index 00000000..1e8bcc7f --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log @@ -0,0 +1,10 @@ +2025-03-30T12:13:47+00:00 +Running ./dl-op-linalg-reduceaddf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.33, 5.81 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log new file mode 100644 index 00000000..7ed900ff --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log @@ -0,0 +1,10 @@ +2025-03-30T12:13:48+00:00 +Running ./dl-op-linalg-reducemaxf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.08, 1.34, 5.79 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log new file mode 100644 index 00000000..37b85c1d --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:48+00:00 +Running ./dl-op-linalg-softmax-exp-sum-div-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.08, 1.34, 5.79 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------- +BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 124261 +BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 182159 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log new file mode 100644 index 00000000..ac6c4e30 --- /dev/null +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log @@ -0,0 +1,21 @@ +2025-03-30T12:13:50+00:00 +Running ./dl-op-matmul-transpose-b-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.08, 1.34, 5.79 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +----------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +----------------------------------------------------------------------------------------------- +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1046 ms 1044 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 277 ms 277 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 32.4 ms 21.2 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 84.6 ms 84.6 ms 5 +---------- Verification ---------- +scalar_O3 PASS +scalar_O3_omp PASS +vec PASS diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log new file mode 100644 index 00000000..aec2390a --- /dev/null +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log @@ -0,0 +1,17 @@ +2025-03-30T12:13:50+00:00 +Running ./dl-op-tosa-transpose-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.08, 1.34, 5.79 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------------------------- +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 25.4 ms 20.6 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 19.2 ms 14.2 ms 5 +---------- Verification ---------- +scalar_O3 PASS diff --git a/test_result/deeplearning/run_results_summary.log b/test_result/deeplearning/run_results_summary.log new file mode 100644 index 00000000..ce1a088d --- /dev/null +++ b/test_result/deeplearning/run_results_summary.log @@ -0,0 +1,29 @@ +[Success] Run of 'dl-model-tinyllama-benchmark' +[Success] Run of 'dl-model-mobilenetv3-benchmark' +[Success] Run of 'dl-model-lenet-benchmark' +[Missing] Executable not found for 'dl-model-bert-benchmark' +[Success] Run of 'dl-model-whisper-benchmark' +[Success] Run of 'dl-model-resnet18-benchmark' +[Success] Run of 'dl-layer-ffn-benchmark' +[Success] Run of 'dl-layer-selfattention-benchmark' +[Success] Run of 'dl-layer-rmsnorm-benchmark' +[Success] Run of 'dl-op-linalg-matmul-benchmark' +[Success] Run of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Success] Run of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Success] Run of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Success] Run of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Success] Run of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Success] Run of 'dl-op-linalg-batch-matmul-benchmark' +[Success] Run of 'dl-op-linalg-arithaddf-benchmark' +[Success] Run of 'dl-op-linalg-arithdivf-benchmark' +[Success] Run of 'dl-op-linalg-arithmulf-benchmark' +[Success] Run of 'dl-op-linalg-arithnegf-benchmark' +[Success] Run of 'dl-op-linalg-arithsubf-benchmark' +[Success] Run of 'dl-op-linalg-mathfpow-benchmark' +[Success] Run of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Run of 'dl-op-linalg-mathexp-benchmark' +[Failed] Run of 'dl-op-linalg-reduceaddf-benchmark' +[Failed] Run of 'dl-op-linalg-reducemaxf-benchmark' +[Success] Run of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Success] Run of 'dl-op-tosa-transpose-benchmark' +[Success] Run of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/geminiprocessing/build.log b/test_result/geminiprocessing/build.log new file mode 100644 index 00000000..8473f261 --- /dev/null +++ b/test_result/geminiprocessing/build.log @@ -0,0 +1,655 @@ +[1/21] Creating directories for 'project_googlebenchmark' +[2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +riscv64-unknown-linux-gnu-gcc -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4': +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given + 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); + | ^ +In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23: +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:251: note: macro "gemmini_extended_config_ex" defined here + 251 | #define gemmini_extended_config_ex(dataflow, sys_act, sys_shift, relu6_shift, A_stride, A_transpose, B_transpose) \ + | +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: error: 'gemmini_extended_config_ex' undeclared (first use in this function) + 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: note: each undeclared identifier is reported only once for each function it appears in +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:35:18: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] + 35 | int32_t *res = (int32_t*) ((uint32_t)gemm_acc_malloc (16 * 16 * 4 * 4 * sizeof(int32_t))); + | ^ +In file included from /home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:20, + from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23: +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload' + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:119: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload' + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload' + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:125: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload' + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload' + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload' + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload' + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload' + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload' + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:126: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload' + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload' + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:132: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload' + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload' + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload' + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload' + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload' + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload' + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload' + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload' + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload' + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload' + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload' + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload' + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload' + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload' + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload' + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload' + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload' + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload' + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload' + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload' + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload' + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:89: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:9: note: in expansion of macro 'gemmini_extended_mvout' + 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:9: note: in expansion of macro 'gemmini_extended_mvout' + 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:9: note: in expansion of macro 'gemmini_extended_mvout' + 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:9: note: in expansion of macro 'gemmini_extended_mvout' + 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:105:17: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 105 | gemm_acc_free((uint32_t)(res)); + | ^ +[3/21] Generating buddy_matmul.o +[4/21] Building CXX object benchmarks/Gemmini/ResNet-101/CMakeFiles/CRunnerUtils.dir/CRunnerUtils.cpp.o +[5/21] Performing download step (git clone) for 'project_googlebenchmark' +Cloning into 'project_googlebenchmark'... +HEAD is now at f91b6b4 bump version to 1.6 in preparation for release +[6/21] Generating resnet-101.o +ninja: build stopped: subcommand failed. diff --git a/test_result/geminiprocessing/cmake_configure.log b/test_result/geminiprocessing/cmake_configure.log new file mode 100644 index 00000000..a3a42f37 --- /dev/null +++ b/test_result/geminiprocessing/cmake_configure.log @@ -0,0 +1,37 @@ +-- The CXX compiler identification is GNU 9.2.0 +-- The C compiler identification is GNU 9.2.0 +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Check for working CXX compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-g++ - skipped +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Detecting C compiler ABI info +-- Detecting C compiler ABI info - done +-- Check for working C compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-gcc - skipped +-- Detecting C compile features +-- Detecting C compile features - done +-- Configuring Target Architecture: avx512f +-- Configuring Target Triple: x86_64-unknown-linux-gnu +-- Configuring benchmarks: google +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed +-- Looking for pthread_create in pthreads +-- Looking for pthread_create in pthreads - not found +-- Looking for pthread_create in pthread +-- Looking for pthread_create in pthread - found +-- Found Threads: TRUE +-- Performing Test HAVE_SSE +-- Performing Test HAVE_SSE - Failed +-- SSE support - no +-- Performing Test HAVE_AVX2 +-- Performing Test HAVE_AVX2 - Failed +-- AVX2 support - no +-- Performing Test HAVE_AVX512 +-- Performing Test HAVE_AVX512 - Failed +-- AVX512 support - no +-- Performing Test HAVE_NEON +-- Performing Test HAVE_NEON - Failed +-- Arm Neon support - no +-- Configuring done +-- Generating done +-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build From e4b813335c52f39e4b28832b3da1897888ba85d4 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 25 May 2025 19:36:43 +0200 Subject: [PATCH 02/52] ci test --- .github/workflow/bench.yml | 70 ++++++ scripts/logs2html.py | 17 ++ scripts/run_docker.sh | 25 +++ test/test_script_vectorizationprocessing.sh | 13 ++ .../build_results_crosscompile_summary.log | 38 ++-- .../deeplearning/dl-layer-ffn-benchmark.log | 8 +- .../dl-layer-rmsnorm-benchmark.log | 8 +- .../dl-layer-selfattention-benchmark.log | 6 +- .../deeplearning/dl-model-lenet-benchmark.log | 8 +- .../dl-model-mobilenetv3-benchmark.log | 8 +- .../dl-model-resnet18-benchmark.log | 8 +- .../dl-model-tinyllama-benchmark.log | 10 +- .../dl-model-whisper-benchmark.log | 8 +- .../dl-op-linalg-arithaddf-benchmark.log | 8 +- .../dl-op-linalg-arithdivf-benchmark.log | 8 +- .../dl-op-linalg-arithmulf-benchmark.log | 8 +- .../dl-op-linalg-arithnegf-benchmark.log | 8 +- .../dl-op-linalg-arithsubf-benchmark.log | 8 +- .../dl-op-linalg-batch-matmul-benchmark.log | 14 +- ...l-op-linalg-conv2d-nchw-fchw-benchmark.log | 8 +- ...l-op-linalg-conv2d-nhwc-fhwc-benchmark.log | 8 +- ...l-op-linalg-conv2d-nhwc-hwcf-benchmark.log | 8 +- ...g-depthwise-conv-2d-nhwc-hwc-benchmark.log | 10 +- .../dl-op-linalg-mathexp-benchmark.log | 8 +- .../dl-op-linalg-mathfpow-benchmark.log | 8 +- .../dl-op-linalg-mathrsqrt-benchmark.log | 8 +- .../dl-op-linalg-matmul-benchmark.log | 14 +- ...l-op-linalg-pooling-nhwc-sum-benchmark.log | 8 +- .../dl-op-linalg-reduceaddf-benchmark.log | 4 +- .../dl-op-linalg-reducemaxf-benchmark.log | 4 +- ...p-linalg-softmax-exp-sum-div-benchmark.log | 8 +- .../dl-op-matmul-transpose-b-benchmark.log | 12 +- .../dl-op-tosa-transpose-benchmark.log | 8 +- .../vectorization/vectorization_result.log | 202 ++++++++++++++++++ 34 files changed, 466 insertions(+), 133 deletions(-) create mode 100644 .github/workflow/bench.yml create mode 100644 scripts/logs2html.py create mode 100644 scripts/run_docker.sh create mode 100644 test_result/vectorization/vectorization_result.log diff --git a/.github/workflow/bench.yml b/.github/workflow/bench.yml new file mode 100644 index 00000000..5c5f9d5b --- /dev/null +++ b/.github/workflow/bench.yml @@ -0,0 +1,70 @@ +name: Buddy-Benchmark CI + +on: + push: + branches: [main] # or whatever branch should trigger the run + pull_request: + +jobs: + bench: + runs-on: self-hosted # your own actions-runner machine + permissions: + contents: read + pages: write + id-token: write # required by Pages deploy + + steps: + # ------------------------------------------------------------ + # 1) check out the two source trees side-by-side + # ------------------------------------------------------------ + - uses: actions/checkout@v4 + with: {path: buddy-benchmark} + + - uses: actions/checkout@v4 + with: + repository: BuddyCompiler/buddy-mlir # adjust if fork + path: buddy-mlir + + # ------------------------------------------------------------ + # 2) run everything in the Docker sandbox + # ------------------------------------------------------------ + - name: Build & run benchmarks in container + working-directory: buddy-benchmark + run: | + chmod +x scripts/run_docker.sh + scripts/run_docker.sh + + # after the script we have ./test_result in the workspace --------- + + # ------------------------------------------------------------ + # 3) keep a raw download for debugging + # ------------------------------------------------------------ + - name: Upload raw logs as artifact + uses: actions/upload-artifact@v4 + with: + name: vectorization-logs-${{ github.sha }} + path: buddy-benchmark/test_result + retention-days: 30 + + # ------------------------------------------------------------ + # 4) convert *.log → HTML + # ------------------------------------------------------------ + - name: Build mini-site + working-directory: buddy-benchmark + run: | + python scripts/logs2html.py test_result site + + # ------------------------------------------------------------ + # 5) publish the site to GitHub Pages + # ------------------------------------------------------------ + - name: Upload site artifact + uses: actions/upload-pages-artifact@v3 + with: + path: buddy-benchmark/site + + - name: Deploy to Pages + id: deploy + uses: actions/deploy-pages@v4 + + outputs: + page_url: ${{ steps.deploy.outputs.page_url }} diff --git a/scripts/logs2html.py b/scripts/logs2html.py new file mode 100644 index 00000000..f96eab99 --- /dev/null +++ b/scripts/logs2html.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +"""Turn every *.log under into /.html + an index.html.""" +import html, pathlib, datetime, sys + +src, dst = map(pathlib.Path, sys.argv[1:3]) +dst.mkdir(parents=True, exist_ok=True) +stamp = datetime.datetime.utcnow().isoformat(' ', 'seconds') + +for log in src.rglob("*.log"): + rel = log.relative_to(src) + page = dst / rel.with_suffix(".html") + page.parent.mkdir(parents=True, exist_ok=True) + page.write_text(f"

{rel}

{stamp} UTC

{html.escape(log.read_text())}
") + +links = "\n".join(f'
  • {p.as_posix()}
  • ' + for p in sorted(dst.rglob("*.html")) if p.name != "index.html") +(dst / "index.html").write_text(f"

    Buddy-Benchmark results

      {links}
    ") diff --git a/scripts/run_docker.sh b/scripts/run_docker.sh new file mode 100644 index 00000000..f7962c77 --- /dev/null +++ b/scripts/run_docker.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -e + +# ➊ one container per run, killed automatically on exit +CID=$(docker run -d --name buddy-mlir-ci-test \ + --privileged \ + -v "${GITHUB_WORKSPACE}:/home/buddy-complier-workspace" \ + liuqun1006/buddycompiler-base:python sleep infinity) + +trap "docker rm -f ${CID}" EXIT + +# ➋ execute the whole build-and-test sequence inside +docker exec "${CID}" bash -lc ' + set -e + cd /home/buddy-complier-workspace/buddy-mlir + ./test.sh build-llvm + ./test.sh build-buddy + ./test.sh run + + cd /home/buddy-complier-workspace/buddy-benchmark/test + ./test_script_vectorizationprocessing.sh +' + +# ➌ bring the logs back to the host (under ./test_result) +docker cp "${CID}":/home/buddy-complier-workspace/buddy-benchmark/test_result ./test_result diff --git a/test/test_script_vectorizationprocessing.sh b/test/test_script_vectorizationprocessing.sh index 97899922..4fccc193 100755 --- a/test/test_script_vectorizationprocessing.sh +++ b/test/test_script_vectorizationprocessing.sh @@ -1,11 +1,23 @@ #!/usr/bin/env bash +apt update +apt install -y libc6-riscv64-cross +apt install -y \ + libc6-riscv64-cross \ + libstdc++6-riscv64-cross \ + libgcc-s1-riscv64-cross ################################################################################ # 1. Script Setup ################################################################################ set -e BUDDY_MLIR_BUILD_DIR="/home/buddy-complier-workspace/buddy-mlir/build" LLVM_MLIR_BUILD_DIR="/home/buddy-complier-workspace/buddy-mlir/llvm/build" +# Export environment variables: +PYTHONPATH="${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH}" +BUDDY_MLIR_BUILD_CROSS_DIR=${BUDDY_MLIR_BUILD_DIR}/../build +RISCV_GNU_TOOLCHAIN=${BUDDY_MLIR_BUILD_DIR}/../thirdparty/riscv-gnu-toolchain +RISCV_OMP_SHARED=${LLVM_MLIR_BUILD_DIR}/../build/lib/libomp.so +BENCHMARK_PATH="${BUDDY_MLIR_DIR}/../buddy-benchmark" echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" @@ -28,6 +40,7 @@ cmake -G Ninja .. \ echo "[Info] Building vectorization-matrix-benchmark..." | tee -a "${LOG_FILE}" ninja vectorization-matrix-benchmark 2>&1 | tee -a "${LOG_FILE}" +export QEMU_LD_PREFIX=/usr/riscv64-linux-gnu ################################################################################ # 3. Run Benchmark ################################################################################ diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log index df608647..d76da098 100644 --- a/test_result/deeplearning/build_results_crosscompile_summary.log +++ b/test_result/deeplearning/build_results_crosscompile_summary.log @@ -1,23 +1,29 @@ [Failed] Build of 'dl-model-tinyllama-benchmark' [Failed] Build of 'dl-model-mobilenetv3-benchmark' -[Failed] Build of 'dl-model-lenet-benchmark' +[Success] Build of 'dl-model-lenet-benchmark' [Failed] Build of 'dl-model-bert-benchmark' [Failed] Build of 'dl-model-whisper-benchmark' [Failed] Build of 'dl-model-resnet18-benchmark' -[Failed] Build of 'dl-layer-ffn-benchmark' -[Failed] Build of 'dl-layer-selfattention-benchmark' -[Failed] Build of 'dl-layer-rmsnorm-benchmark' +[Success] Build of 'dl-layer-ffn-benchmark' +[Success] Build of 'dl-layer-selfattention-benchmark' +[Success] Build of 'dl-layer-rmsnorm-benchmark' [Failed] Build of 'dl-op-linalg-matmul-benchmark' -[Failed] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' -[Failed] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' -[Failed] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' -[Failed] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' -[Failed] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' [Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' -[Failed] Build of 'dl-op-linalg-arithaddf-benchmark' -[Failed] Build of 'dl-op-linalg-arithdivf-benchmark' -[Failed] Build of 'dl-op-linalg-arithmulf-benchmark' -[Failed] Build of 'dl-op-linalg-arithnegf-benchmark' -[Failed] Build of 'dl-op-linalg-arithsubf-benchmark' -[Failed] Build of 'dl-op-linalg-mathfpow-benchmark' -[Failed] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Build of 'dl-op-linalg-arithaddf-benchmark' +[Success] Build of 'dl-op-linalg-arithdivf-benchmark' +[Success] Build of 'dl-op-linalg-arithmulf-benchmark' +[Success] Build of 'dl-op-linalg-arithnegf-benchmark' +[Success] Build of 'dl-op-linalg-arithsubf-benchmark' +[Success] Build of 'dl-op-linalg-mathfpow-benchmark' +[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Build of 'dl-op-linalg-mathexp-benchmark' +[Success] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Success] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Failed] Build of 'dl-op-tosa-transpose-benchmark' +[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log index 677ea1bb..186cd636 100644 --- a/test_result/deeplearning/dl-layer-ffn-benchmark.log +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:12:58+00:00 +2025-05-25T16:33:30+00:00 Running ./dl-layer-ffn-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.39, 6.06 +Load Average: 1.00, 1.14, 3.58 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------- -DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10641 -DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 26024 +DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10714 +DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25753 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log index 2ce19761..351d605f 100644 --- a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:02+00:00 +2025-05-25T16:33:34+00:00 Running ./dl-layer-rmsnorm-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.39, 6.06 +Load Average: 1.00, 1.13, 3.57 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------ -DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 356344 -DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 764783 +DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 360260 +DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 748474 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log index 17623060..14874ba9 100644 --- a/test_result/deeplearning/dl-layer-selfattention-benchmark.log +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:00+00:00 +2025-05-25T16:33:32+00:00 Running ./dl-layer-selfattention-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.39, 6.06 +Load Average: 1.00, 1.14, 3.58 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- DL_LAYER_ATTENTION/Scalar 4.68 ms 4.68 ms 150 -DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 455 +DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 446 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log index a1ce7074..f2c5402a 100644 --- a/test_result/deeplearning/dl-model-lenet-benchmark.log +++ b/test_result/deeplearning/dl-model-lenet-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:09:01+00:00 +2025-05-25T16:29:36+00:00 Running ./dl-model-lenet-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.04, 1.86, 7.56 +Load Average: 1.00, 1.32, 4.34 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------- -DL_MODEL_LENET/Auto_Vectorization 0.164 ms 0.164 ms 4368 -DL_MODEL_LENET/Buddy_Vectorization 0.154 ms 0.154 ms 5094 +DL_MODEL_LENET/Auto_Vectorization 0.152 ms 0.152 ms 4530 +DL_MODEL_LENET/Buddy_Vectorization 0.136 ms 0.136 ms 5149 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log index 8bed1b85..075a009a 100644 --- a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:08:59+00:00 +2025-05-25T16:29:34+00:00 Running ./dl-model-mobilenetv3-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.04, 1.86, 7.56 +Load Average: 1.00, 1.32, 4.34 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------- -BM_MobileNet_V3/BM_MobileNet_V3_scalar 36.7 ms 36.7 ms 18 -BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.6 ms 32.6 ms 22 +BM_MobileNet_V3/BM_MobileNet_V3_scalar 35.1 ms 35.1 ms 20 +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.0 ms 32.0 ms 22 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.log b/test_result/deeplearning/dl-model-resnet18-benchmark.log index e95722a5..97b71868 100644 --- a/test_result/deeplearning/dl-model-resnet18-benchmark.log +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:12:55+00:00 +2025-05-25T16:33:27+00:00 Running ./dl-model-resnet18-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.39, 6.08 +Load Average: 1.00, 1.14, 3.59 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -DL_MODEL_Resnet18/Auto_Vectorization 723 ms 722 ms 1 -DL_MODEL_Resnet18/Buddy_Vectorization 726 ms 718 ms 1 +DL_MODEL_Resnet18/Auto_Vectorization 720 ms 720 ms 1 +DL_MODEL_Resnet18/Buddy_Vectorization 719 ms 719 ms 1 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.log b/test_result/deeplearning/dl-model-tinyllama-benchmark.log index e07df494..9054af0a 100644 --- a/test_result/deeplearning/dl-model-tinyllama-benchmark.log +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:03:25+00:00 +2025-05-25T16:24:01+00:00 Running ./dl-model-tinyllama-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.75, 3.61, 10.42 +Load Average: 1.79, 2.00, 5.81 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ---------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------- -DL_MODEL_TINYLLAMA/scalar 158531 ms 158516 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt 9744 ms 9735 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt_omp 7716 ms 7038 ms 1 +DL_MODEL_TINYLLAMA/scalar 160502 ms 160495 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt 9595 ms 9595 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt_omp 7607 ms 6928 ms 1 ---------- Verification ---------- matmul_opt PASS matmul_opt_omp PASS diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.log b/test_result/deeplearning/dl-model-whisper-benchmark.log index e4534bbc..74e917e7 100644 --- a/test_result/deeplearning/dl-model-whisper-benchmark.log +++ b/test_result/deeplearning/dl-model-whisper-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:09:03+00:00 +2025-05-25T16:29:38+00:00 Running ./dl-model-whisper-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.04, 1.84, 7.52 +Load Average: 1.00, 1.31, 4.32 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_MODEL_Whisper/Auto_Vectorization 78390 ms 78388 ms 1 -DL_MODEL_Whisper/Buddy_Vectorization 36641 ms 36637 ms 1 +DL_MODEL_Whisper/Auto_Vectorization 77089 ms 77086 ms 1 +DL_MODEL_Whisper/Buddy_Vectorization 35954 ms 35953 ms 1 ----------------------------------------------------------- Correctness Verification for Output1: PASS Correctness Verification for Output2: FAIL diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log index d89cd1e1..3561bcf9 100644 --- a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:31+00:00 +2025-05-25T16:34:03+00:00 Running ./dl-op-linalg-arithaddf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.35, 5.89 +Load Average: 1.00, 1.12, 3.48 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_ADDF_SCALAR 0.030 ms 0.030 ms 23440 -BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 175032 +BM_ADDF_SCALAR 0.030 ms 0.030 ms 23576 +BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 174965 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log index 02d6e568..94f4277b 100644 --- a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:33+00:00 +2025-05-25T16:34:05+00:00 Running ./dl-op-linalg-arithdivf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.34, 5.87 +Load Average: 1.00, 1.12, 3.48 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_DIVF_SCALAR 0.029 ms 0.029 ms 23951 -BM_DIVF_AutoVectorization 0.009 ms 0.009 ms 73837 +BM_DIVF_SCALAR 0.030 ms 0.030 ms 23149 +BM_DIVF_AutoVectorization 0.009 ms 0.009 ms 73790 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log index 4fa4ffde..7eb04e7f 100644 --- a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:35+00:00 +2025-05-25T16:34:07+00:00 Running ./dl-op-linalg-arithmulf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.34, 5.87 +Load Average: 1.00, 1.12, 3.48 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_MULF_SCALAR 0.029 ms 0.029 ms 23549 -BM_MULF_AutoVectorization 0.004 ms 0.004 ms 174752 +BM_MULF_SCALAR 0.030 ms 0.030 ms 23959 +BM_MULF_AutoVectorization 0.004 ms 0.004 ms 175122 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log index e6387a2a..fc8e6962 100644 --- a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:37+00:00 +2025-05-25T16:34:09+00:00 Running ./dl-op-linalg-arithnegf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.34, 5.87 +Load Average: 1.00, 1.12, 3.47 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_NEGF_SCALAR 0.022 ms 0.022 ms 30658 -BM_NEGF_AutoVectorization 0.003 ms 0.003 ms 245490 +BM_NEGF_SCALAR 0.023 ms 0.023 ms 30704 +BM_NEGF_AutoVectorization 0.003 ms 0.003 ms 212512 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log index 3a9efa27..6d9c797c 100644 --- a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:39+00:00 +2025-05-25T16:34:11+00:00 Running ./dl-op-linalg-arithsubf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.34, 5.84 +Load Average: 1.00, 1.12, 3.47 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_SUBF_SCALAR 0.029 ms 0.029 ms 23697 -BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 147910 +BM_SUBF_SCALAR 0.030 ms 0.030 ms 23752 +BM_SUBF_AutoVectorization 0.005 ms 0.005 ms 174941 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log index d187e4d9..2d4aa1cb 100644 --- a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:21+00:00 +2025-05-25T16:33:53+00:00 Running ./dl-op-linalg-batch-matmul-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,18 +6,18 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.36, 5.95 +Load Average: 1.00, 1.12, 3.51 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3525 ms 3517 ms 1 -DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 976 ms 976 ms 1 -DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 189 ms 189 ms 1 +DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3525 ms 3525 ms 1 +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 974 ms 974 ms 1 +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 190 ms 190 ms 1 DL_OPS_BATCH_MATMUL/Tile/iterations:1 109 ms 109 ms 1 DL_OPS_BATCH_MATMUL/SCF/iterations:1 117 ms 117 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 353 ms 353 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 75.4 ms 38.2 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 352 ms 352 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 80.7 ms 53.0 ms 1 ---------- Verification ---------- Tile PASS SCF PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log index 4e58a246..d41a78ce 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:15+00:00 +2025-05-25T16:33:46+00:00 Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.37, 5.97 +Load Average: 1.00, 1.13, 3.54 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- -BM_Conv2DNchwFchw_SCALAR 283 ms 283 ms 2 -BM_Conv2DNchwFchw_Im2col 10.2 ms 10.2 ms 68 +BM_Conv2DNchwFchw_SCALAR 282 ms 282 ms 2 +BM_Conv2DNchwFchw_Im2col 8.35 ms 8.35 ms 86 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log index 08115149..9f8ee937 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:18+00:00 +2025-05-25T16:33:50+00:00 Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.36, 5.95 +Load Average: 1.00, 1.13, 3.52 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------------- -DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 73.5 ms 73.5 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.35 ms 9.35 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 72.3 ms 72.3 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.34 ms 9.34 ms 5 DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.74 ms 1.74 ms 5 DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.73 ms 1.73 ms 5 ---------- Verification ---------- diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log index f09a4101..34a043ac 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:17+00:00 +2025-05-25T16:33:48+00:00 Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.37, 5.97 +Load Average: 1.00, 1.13, 3.52 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------- -BM_CONV_2D_NHWC_HWCF_SCALAR 32.4 ms 32.4 ms 22 -BM_CONV_2D_NHWC_HWCF_AutoVectorization 5.83 ms 5.83 ms 120 +BM_CONV_2D_NHWC_HWCF_SCALAR 32.3 ms 32.3 ms 22 +BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.14 ms 6.14 ms 114 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log index c761a6b6..3573e854 100644 --- a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:19+00:00 +2025-05-25T16:33:50+00:00 Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.36, 5.95 +Load Average: 1.00, 1.13, 3.52 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------ -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 6.25 ms 6.25 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.71 ms 1.71 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.128 ms 0.128 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 6.55 ms 6.54 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.68 ms 1.68 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.124 ms 0.124 ms 5 ---------- Verification ---------- auto_vectorization PASS vectorization PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log index c3ecd554..ed7837dc 100644 --- a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:45+00:00 +2025-05-25T16:34:17+00:00 Running ./dl-op-linalg-mathexp-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.33, 5.81 +Load Average: 1.00, 1.11, 3.46 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- -BM_EXP_SCALAR 0.046 ms 0.046 ms 15309 -BM_EXP_AutoVectorization 0.032 ms 0.032 ms 21998 +BM_EXP_SCALAR 0.046 ms 0.046 ms 15245 +BM_EXP_AutoVectorization 0.031 ms 0.031 ms 22544 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log index 018b3377..58cbd7ee 100644 --- a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:42+00:00 +2025-05-25T16:34:13+00:00 Running ./dl-op-linalg-mathfpow-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.34, 5.84 +Load Average: 1.00, 1.11, 3.46 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_FPOW_SCALAR 0.084 ms 0.084 ms 8347 -BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12328 +BM_FPOW_SCALAR 0.084 ms 0.084 ms 8153 +BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12317 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log index bf045f07..0e28e595 100644 --- a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:43+00:00 +2025-05-25T16:34:15+00:00 Running ./dl-op-linalg-mathrsqrt-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.33, 5.81 +Load Average: 1.00, 1.11, 3.46 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9497 -BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 161025 +BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9557 +BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 161107 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log index 412446c6..d020eeff 100644 --- a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:04+00:00 +2025-05-25T16:33:36+00:00 Running ./dl-op-linalg-matmul-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,16 +6,16 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.38, 6.03 +Load Average: 1.00, 1.13, 3.57 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_OPS_MATMUL/scalar_O0/iterations:1 3716 ms 3716 ms 1 -DL_OPS_MATMUL/scalar_O3/iterations:1 3312 ms 3312 ms 1 -DL_OPS_MATMUL/tile/iterations:1 117 ms 117 ms 1 -DL_OPS_MATMUL/vec/iterations:1 140 ms 140 ms 1 -DL_OPS_MATMUL/vec_omp/iterations:1 20.5 ms 18.8 ms 1 +DL_OPS_MATMUL/scalar_O0/iterations:1 3394 ms 3394 ms 1 +DL_OPS_MATMUL/scalar_O3/iterations:1 2944 ms 2944 ms 1 +DL_OPS_MATMUL/tile/iterations:1 120 ms 120 ms 1 +DL_OPS_MATMUL/vec/iterations:1 139 ms 139 ms 1 +DL_OPS_MATMUL/vec_omp/iterations:1 67.8 ms 17.8 ms 1 ---------- Verification ---------- tile PASS vec PASS diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log index f9296017..c13c5712 100644 --- a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:19+00:00 +2025-05-25T16:33:51+00:00 Running ./dl-op-linalg-pooling-nhwc-sum-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.36, 5.95 +Load Average: 1.00, 1.13, 3.52 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 3007 -BM_POOLING_NHWC_SUM_AutoVectorization 0.042 ms 0.042 ms 16752 +BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 2997 +BM_POOLING_NHWC_SUM_AutoVectorization 0.042 ms 0.042 ms 16895 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log index 1e8bcc7f..5e44eb1f 100644 --- a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:47+00:00 +2025-05-25T16:34:19+00:00 Running ./dl-op-linalg-reduceaddf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,5 +6,5 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.33, 5.81 +Load Average: 1.00, 1.11, 3.44 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log index 7ed900ff..e96bd0ad 100644 --- a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:48+00:00 +2025-05-25T16:34:19+00:00 Running ./dl-op-linalg-reducemaxf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,5 +6,5 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.34, 5.79 +Load Average: 1.00, 1.11, 3.44 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log index 37b85c1d..c503949b 100644 --- a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:48+00:00 +2025-05-25T16:34:19+00:00 Running ./dl-op-linalg-softmax-exp-sum-div-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.34, 5.79 +Load Average: 1.00, 1.11, 3.44 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 124261 -BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 182159 +BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 123343 +BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 181973 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log index ac6c4e30..05074bd2 100644 --- a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:50+00:00 +2025-05-25T16:34:22+00:00 Running ./dl-op-matmul-transpose-b-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,15 +6,15 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.34, 5.79 +Load Average: 1.00, 1.11, 3.44 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------------------- -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1046 ms 1044 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 277 ms 277 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 32.4 ms 21.2 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 84.6 ms 84.6 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1262 ms 1262 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 311 ms 311 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 33.9 ms 22.0 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 85.3 ms 85.3 ms 5 ---------- Verification ---------- scalar_O3 PASS scalar_O3_omp PASS diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log index aec2390a..12c7ba28 100644 --- a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log @@ -1,4 +1,4 @@ -2025-03-30T12:13:50+00:00 +2025-05-25T16:34:22+00:00 Running ./dl-op-tosa-transpose-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,12 +6,12 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.34, 5.79 +Load Average: 1.00, 1.11, 3.44 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------- -DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 25.4 ms 20.6 ms 5 -DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 19.2 ms 14.2 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 25.6 ms 19.9 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 19.1 ms 16.2 ms 5 ---------- Verification ---------- scalar_O3 PASS diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log new file mode 100644 index 00000000..a5ec2b95 --- /dev/null +++ b/test_result/vectorization/vectorization_result.log @@ -0,0 +1,202 @@ +Vectorization Benchmark - Sun May 25 16:08:49 UTC 2025 +[Info] Running CMake configuration... +-- The CXX compiler identification is GNU 11.4.0 +-- The C compiler identification is GNU 11.4.0 +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Check for working CXX compiler: /usr/bin/c++ - skipped +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Detecting C compiler ABI info +-- Detecting C compiler ABI info - done +-- Check for working C compiler: /usr/bin/cc - skipped +-- Detecting C compile features +-- Detecting C compile features - done +-- Configuring Target Architecture: avx512f +-- Configuring Target Triple: x86_64-unknown-linux-gnu +-- Configuring benchmarks: google +-- Looking for pthread.h +-- Looking for pthread.h - found +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success +-- Found Threads: TRUE +-- Performing Test HAVE_SSE +-- Performing Test HAVE_SSE - Success +-- SSE support - yes +-- Performing Test HAVE_AVX2 +-- Performing Test HAVE_AVX2 - Success +-- AVX2 support - yes +-- Performing Test HAVE_AVX512 +-- Performing Test HAVE_AVX512 - Failed +-- AVX512 support - no +-- Performing Test HAVE_NEON +-- Performing Test HAVE_NEON - Failed +-- Arm Neon support - no +-- Configuring done +-- Generating done +-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build +[Info] Building vectorization-matrix-benchmark... +[1/17] Generating mlir-matmul.o +[2/17] Generating mlir-matvec.o +[3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a +[4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a +[5/17] Creating directories for 'project_googlebenchmark' +[6/17] Performing download step (git clone) for 'project_googlebenchmark' +Cloning into 'project_googlebenchmark'... +HEAD is now at f91b6b4 bump version to 1.6 in preparation for release +[7/17] No update step for 'project_googlebenchmark' +[8/17] No patch step for 'project_googlebenchmark' +[9/17] Performing configure step for 'project_googlebenchmark' +-- The CXX compiler identification is GNU 11.4.0 +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Check for working CXX compiler: /usr/bin/c++ - skipped +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Failed to find LLVM FileCheck +-- Found Git: /usr/bin/git (found version "2.34.1") +-- git version: v1.6.0 normalized to 1.6.0 +-- Version: 1.6.0 +-- Performing Test HAVE_CXX_FLAG_STD_CXX11 +-- Performing Test HAVE_CXX_FLAG_STD_CXX11 - Success +-- Performing Test HAVE_CXX_FLAG_WALL +-- Performing Test HAVE_CXX_FLAG_WALL - Success +-- Performing Test HAVE_CXX_FLAG_WEXTRA +-- Performing Test HAVE_CXX_FLAG_WEXTRA - Success +-- Performing Test HAVE_CXX_FLAG_WSHADOW +-- Performing Test HAVE_CXX_FLAG_WSHADOW - Success +-- Performing Test HAVE_CXX_FLAG_WERROR +-- Performing Test HAVE_CXX_FLAG_WERROR - Success +-- Performing Test HAVE_CXX_FLAG_WSUGGEST_OVERRIDE +-- Performing Test HAVE_CXX_FLAG_WSUGGEST_OVERRIDE - Success +-- Performing Test HAVE_CXX_FLAG_PEDANTIC +-- Performing Test HAVE_CXX_FLAG_PEDANTIC - Success +-- Performing Test HAVE_CXX_FLAG_PEDANTIC_ERRORS +-- Performing Test HAVE_CXX_FLAG_PEDANTIC_ERRORS - Success +-- Performing Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32 +-- Performing Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32 - Failed +-- Performing Test HAVE_CXX_FLAG_FSTRICT_ALIASING +-- Performing Test HAVE_CXX_FLAG_FSTRICT_ALIASING - Success +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS - Success +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED - Success +-- Performing Test HAVE_CXX_FLAG_WSTRICT_ALIASING +-- Performing Test HAVE_CXX_FLAG_WSTRICT_ALIASING - Success +-- Performing Test HAVE_CXX_FLAG_WD654 +-- Performing Test HAVE_CXX_FLAG_WD654 - Failed +-- Performing Test HAVE_CXX_FLAG_WTHREAD_SAFETY +-- Performing Test HAVE_CXX_FLAG_WTHREAD_SAFETY - Failed +-- Performing Test HAVE_CXX_FLAG_COVERAGE +-- Performing Test HAVE_CXX_FLAG_COVERAGE - Success +-- Performing Test HAVE_STD_REGEX +CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message): + If you see build failures due to cross compilation, try setting + HAVE_STD_REGEX to 0 +Call Stack (most recent call first): + CMakeLists.txt:279 (cxx_feature_check) + + +-- Performing Test HAVE_STD_REGEX -- success +-- Performing Test HAVE_GNU_POSIX_REGEX +-- Performing Test HAVE_GNU_POSIX_REGEX -- failed to compile +-- Performing Test HAVE_POSIX_REGEX +CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message): + If you see build failures due to cross compilation, try setting + HAVE_POSIX_REGEX to 0 +Call Stack (most recent call first): + CMakeLists.txt:281 (cxx_feature_check) + + +-- Performing Test HAVE_POSIX_REGEX -- success +-- Performing Test HAVE_STEADY_CLOCK +CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message): + If you see build failures due to cross compilation, try setting + HAVE_STEADY_CLOCK to 0 +Call Stack (most recent call first): + CMakeLists.txt:290 (cxx_feature_check) + + +-- Performing Test HAVE_STEADY_CLOCK -- success +-- Looking for C++ include pthread.h +-- Looking for C++ include pthread.h - found +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success +-- Found Threads: TRUE +-- Configuring done +-- Generating done +-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build +[10/17] Performing build step for 'project_googlebenchmark' +[1/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o +[2/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o +[3/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[4/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o +[7/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o +[9/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[10/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o +[11/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o +[12/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o +[13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[15/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o +[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o +[19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o +[20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o +[21/22] Linking CXX static library src/libbenchmark.a +[22/22] Linking CXX static library src/libbenchmark_main.a +[11/17] Performing install step for 'project_googlebenchmark' +[0/1] Install the project... +-- Install configuration: "Release" +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/libbenchmark.a +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/libbenchmark_main.a +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/include/benchmark +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/include/benchmark/benchmark.h +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkConfig.cmake +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkConfigVersion.cmake +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/pkgconfig/benchmark.pc +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkTargets.cmake +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkTargets-release.cmake +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/AssemblyTests.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/_config.yml +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/dependencies.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/index.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/perf_counters.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/platform_specific_build_instructions.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/random_interleaving.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/releasing.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/tools.md +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/user_guide.md +[12/17] No test step for 'project_googlebenchmark' +[13/17] Completed 'project_googlebenchmark' +[14/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/Main.cpp.o +[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o +[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o +[17/17] Linking CXX executable bin/vectorization-matrix-benchmark +[Info] Running vectorization-matrix-benchmark... +2025-05-25T16:08:56+00:00 +Running ./vectorization-matrix-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 0.25, 6.84, 12.60 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------- +MLIR_MatMul/1 19.5 ns 19.5 ns 36611256 +MLIR_MatVec/1 20.7 ns 20.7 ns 34849004 +-------------------------------------------------------- +MLIR_MatMul: MLIR MatMul Operation + Nested Loop +[ 18 18 18 18 18 18 18 18 18 18 ] +-------------------------------------------------------- +MLIR_MatVec: MLIR MatVec Operation +[ 18 18 18 18 18 18 18 18 18 18 ] From f5ba9cd9155fea877e4df05549d5db3b8efa8797 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 25 May 2025 19:51:01 +0200 Subject: [PATCH 03/52] test bench --- scripts/run_docker.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/run_docker.sh b/scripts/run_docker.sh index f7962c77..a48e21c3 100644 --- a/scripts/run_docker.sh +++ b/scripts/run_docker.sh @@ -23,3 +23,4 @@ docker exec "${CID}" bash -lc ' # ➌ bring the logs back to the host (under ./test_result) docker cp "${CID}":/home/buddy-complier-workspace/buddy-benchmark/test_result ./test_result + From 6981c8d9dc6b5677a4f99f3a49a41032e3f86798 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 25 May 2025 20:16:50 +0200 Subject: [PATCH 04/52] test --- .github/workflow/bench.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflow/bench.yml b/.github/workflow/bench.yml index 5c5f9d5b..29e2b5ee 100644 --- a/.github/workflow/bench.yml +++ b/.github/workflow/bench.yml @@ -2,7 +2,7 @@ name: Buddy-Benchmark CI on: push: - branches: [main] # or whatever branch should trigger the run + branches: ['**'] # or whatever branch should trigger the run pull_request: jobs: From 3746754c4a48b8a04eca61d65c7cfcd0e2c23c26 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 25 May 2025 20:21:45 +0200 Subject: [PATCH 05/52] test --- .clang-format | 0 .gitattributes | 0 .github/ISSUE_TEMPLATE/bug_report.md | 0 .github/ISSUE_TEMPLATE/feature_request.md | 0 .github/{workflow => workflows}/bench.yml | 0 .gitignore | 0 .gitmodules | 0 .style.yapf | 0 CMakeLists.txt | 0 LICENSE | 0 README.md | 0 benchmarks/AudioProcessing/Audios/NASA_Mars.wav | Bin benchmarks/AudioProcessing/CMakeLists.txt | 0 .../Operations/BiquadOp/CMakeLists.txt | 0 .../Operations/BiquadOp/MLIRBiquad.mlir | 0 .../AudioProcessing/Operations/BiquadOp/Main.cpp | 0 .../AudioProcessing/Operations/BiquadOp/Utils.hpp | 0 .../AudioProcessing/Operations/CMakeLists.txt | 0 .../AudioProcessing/Operations/FFTOp/CMakeLists.txt | 0 .../AudioProcessing/Operations/FFTOp/KFRFft.cpp | 0 .../AudioProcessing/Operations/FFTOp/Main.cpp | 0 .../AudioProcessing/Operations/FIROp/CMakeLists.txt | 0 .../AudioProcessing/Operations/FIROp/MLIRFIR.mlir | 0 .../Operations/FIROp/MLIRFIRTiledVectorization.mlir | 0 .../Operations/FIROp/MLIRFIRVectorization.mlir | 0 .../AudioProcessing/Operations/FIROp/Main.cpp | 0 .../AudioProcessing/Operations/FIROp/Utils.hpp | 0 .../AudioProcessing/Operations/IIROp/CMakeLists.txt | 0 .../Operations/IIROp/MLIRIIRScalar.mlir | 0 .../Operations/IIROp/MLIRIIRVectorization.mlir | 0 .../AudioProcessing/Operations/IIROp/Main.cpp | 0 .../AudioProcessing/Operations/IIROp/Utils.hpp | 0 .../Operations/RFFTOp/CMakeLists.txt | 0 .../Operations/RFFTOp/GoogleBenchmarkMain.cpp | 0 .../AudioProcessing/Operations/RFFTOp/RFFT.py | 0 benchmarks/AudioProcessing/README.md | 0 benchmarks/CMakeLists.txt | 0 benchmarks/DeepLearning/CMakeLists.txt | 0 benchmarks/DeepLearning/Layers/CMakeLists.txt | 0 benchmarks/DeepLearning/Layers/FFN/.gitignore | 0 benchmarks/DeepLearning/Layers/FFN/CMakeLists.txt | 0 .../DeepLearning/Layers/FFN/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Layers/FFN/buddy_ffn_import.py | 0 benchmarks/DeepLearning/Layers/RMSNorm/.gitignore | 0 .../DeepLearning/Layers/RMSNorm/CMakeLists.txt | 0 .../Layers/RMSNorm/GoogleBenchmarkMain.cpp | 0 .../Layers/RMSNorm/buddy_rmsnorm_import.py | 0 .../DeepLearning/Layers/SelfAttention/.gitignore | 0 .../Layers/SelfAttention/CMakeLists.txt | 0 .../Layers/SelfAttention/GoogleBenchmarkMain.cpp | 0 .../SelfAttention/buddy_selfattention_import.py | 0 benchmarks/DeepLearning/Models/Bert/.gitignore | 0 benchmarks/DeepLearning/Models/Bert/CMakeLists.txt | 0 .../Models/Bert/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Models/Bert/buddy_bert_import.py | 0 benchmarks/DeepLearning/Models/CMakeLists.txt | 0 benchmarks/DeepLearning/Models/LeNet/.gitignore | 0 benchmarks/DeepLearning/Models/LeNet/CMakeLists.txt | 0 .../Models/LeNet/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Models/LeNet/buddy_lenet_import.py | 0 .../DeepLearning/Models/LeNet/lenet_model.pth | Bin benchmarks/DeepLearning/Models/LeNet/model.py | 0 .../DeepLearning/Models/MobileNet-V3/.gitignore | 0 .../DeepLearning/Models/MobileNet-V3/CMakeLists.txt | 0 .../Models/MobileNet-V3/GoogleBenchmarkMain.cpp | 0 .../Models/MobileNet-V3/buddy_mobilenetv3_import.py | 0 benchmarks/DeepLearning/Models/Resnet18/.gitignore | 0 .../DeepLearning/Models/Resnet18/CMakeLists.txt | 0 .../Models/Resnet18/GoogleBenchmarkMain.cpp | 0 .../Models/Resnet18/buddy_resnet18_import.py | 0 benchmarks/DeepLearning/Models/TinyLlama/.gitignore | 0 .../DeepLearning/Models/TinyLlama/CMakeLists.txt | 0 benchmarks/DeepLearning/Models/TinyLlama/Main.cpp | 0 benchmarks/DeepLearning/Models/TinyLlama/Utils.hpp | 0 .../Models/TinyLlama/buddy_tinyllama_import.py | 0 benchmarks/DeepLearning/Models/Whisper/.gitignore | 0 .../DeepLearning/Models/Whisper/CMakeLists.txt | 0 .../Models/Whisper/GoogleBenchmarkMain.cpp | 0 .../Models/Whisper/buddy_whisper_import.py | 0 .../DeepLearning/Ops/ArithAddfOp/ArithAddf.mlir | 0 .../DeepLearning/Ops/ArithAddfOp/CMakeLists.txt | 0 .../Ops/ArithAddfOp/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Ops/ArithDivfOp/ArithDivf.mlir | 0 .../DeepLearning/Ops/ArithDivfOp/CMakeLists.txt | 0 .../Ops/ArithDivfOp/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Ops/ArithMulfOp/ArithMulf.mlir | 0 .../DeepLearning/Ops/ArithMulfOp/CMakeLists.txt | 0 .../Ops/ArithMulfOp/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Ops/ArithNegfOp/ArithNegf.mlir | 0 .../DeepLearning/Ops/ArithNegfOp/CMakeLists.txt | 0 .../Ops/ArithNegfOp/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Ops/ArithSubfOp/ArithSubf.mlir | 0 .../DeepLearning/Ops/ArithSubfOp/CMakeLists.txt | 0 .../Ops/ArithSubfOp/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Ops/BatchMatMulOp/BatchMatMul.mlir | 0 .../Ops/BatchMatMulOp/BatchMatMulBroadcast.mlir | 0 .../Ops/BatchMatMulOp/BatchMatMulSCF.mlir | 0 .../Ops/BatchMatMulOp/BatchMatMulVec.mlir | 0 .../Ops/BatchMatMulOp/BatchMatMulVecTile.mlir | 0 .../DeepLearning/Ops/BatchMatMulOp/CMakeLists.txt | 0 benchmarks/DeepLearning/Ops/BatchMatMulOp/Main.cpp | 0 benchmarks/DeepLearning/Ops/BatchMatMulOp/Utils.hpp | 0 benchmarks/DeepLearning/Ops/CMakeLists.txt | 0 .../Ops/Conv2DNchwFchwOp/CMakeLists.txt | 0 .../Ops/Conv2DNchwFchwOp/Conv2DNchwFchw.mlir | 0 .../Ops/Conv2DNchwFchwOp/GoogleBenchmarkMain.cpp | 0 .../Conv2DNchwFchwOp/conv2d-nchw-fchw-im2col.mlir | 0 .../Ops/Conv2DNhwcFhwcInt32Op/CMakeLists.txt | 0 .../Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwc.mlir | 0 .../Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVec.mlir | 0 .../Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVecRVV.mlir | 0 .../DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Main.cpp | 0 .../Ops/Conv2DNhwcFhwcInt32Op/Utils.hpp | 0 .../Ops/Conv2DNhwcFhwcOp/CMakeLists.txt | 0 .../Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwc.mlir | 0 .../Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVec.mlir | 0 .../Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVecTile.mlir | 0 .../DeepLearning/Ops/Conv2DNhwcFhwcOp/Main.cpp | 0 .../DeepLearning/Ops/Conv2DNhwcFhwcOp/Utils.hpp | 0 .../Ops/Conv2DNhwcHwcfOp/CMakeLists.txt | 0 .../Ops/Conv2DNhwcHwcfOp/Conv2DNhwcHwcf.mlir | 0 .../Ops/Conv2DNhwcHwcfOp/GoogleBenchmarkMain.cpp | 0 .../Ops/DepthwiseConv2DNhwcHwcOp/CMakeLists.txt | 0 .../DepthwiseConv2DNhwcHwc.mlir | 0 .../DepthwiseConv2DNhwcHwcVec.mlir | 0 .../Ops/DepthwiseConv2DNhwcHwcOp/Main.cpp | 0 .../Ops/DepthwiseConv2DNhwcHwcOp/Utils.hpp | 0 benchmarks/DeepLearning/Ops/MatMulOp/CMakeLists.txt | 0 benchmarks/DeepLearning/Ops/MatMulOp/Main.cpp | 0 benchmarks/DeepLearning/Ops/MatMulOp/Utils.hpp | 0 benchmarks/DeepLearning/Ops/MatMulOp/matmul.mlir | 0 .../Ops/MatMulTransposeBOp/CMakeLists.txt | 0 .../DeepLearning/Ops/MatMulTransposeBOp/Main.cpp | 0 .../Ops/MatMulTransposeBOp/MatMulTransposeB.mlir | 0 .../DeepLearning/Ops/MatMulTransposeBOp/Utils.hpp | 0 .../DeepLearning/Ops/MathExpOp/CMakeLists.txt | 0 .../Ops/MathExpOp/GoogleBenchmarkMain.cpp | 0 benchmarks/DeepLearning/Ops/MathExpOp/MathExp.mlir | 0 .../DeepLearning/Ops/MathFpowOp/CMakeLists.txt | 0 .../Ops/MathFpowOp/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Ops/MathFpowOp/MathFpow.mlir | 0 .../DeepLearning/Ops/MathRsqrtOp/CMakeLists.txt | 0 .../Ops/MathRsqrtOp/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Ops/MathRsqrtOp/MathRsqrt.mlir | 0 .../Ops/PoolingNhwcSumOp/CMakeLists.txt | 0 .../Ops/PoolingNhwcSumOp/GoogleBenchmarkMain.cpp | 0 .../Ops/PoolingNhwcSumOp/PoolingNhwcSum.mlir | 0 .../DeepLearning/Ops/ReduceAddfOp/CMakeLists.txt | 0 .../Ops/ReduceAddfOp/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Ops/ReduceAddfOp/ReduceAddf.mlir | 0 .../DeepLearning/Ops/ReduceMaxfOp/CMakeLists.txt | 0 .../Ops/ReduceMaxfOp/GoogleBenchmarkMain.cpp | 0 .../DeepLearning/Ops/ReduceMaxfOp/ReduceMaxf.mlir | 0 .../Ops/SoftmaxExpSumDivOp/CMakeLists.txt | 0 .../Ops/SoftmaxExpSumDivOp/GoogleBenchmarkMain.cpp | 0 .../Ops/SoftmaxExpSumDivOp/SoftmaxExpSumDiv.mlir | 0 .../DeepLearning/Ops/TransposeOp/CMakeLists.txt | 0 benchmarks/DeepLearning/Ops/TransposeOp/Main.cpp | 0 .../DeepLearning/Ops/TransposeOp/Transpose2D.mlir | 0 benchmarks/DeepLearning/Ops/TransposeOp/Utils.hpp | 0 benchmarks/DeepLearning/README.md | 0 benchmarks/Gemmini/CMakeLists.txt | 0 benchmarks/Gemmini/Ops/CMakeLists.txt | 0 benchmarks/Gemmini/Ops/MatMulOp/CMakeLists.txt | 0 benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c | 0 benchmarks/Gemmini/Ops/MatMulOp/ExoUtils.h | 0 benchmarks/Gemmini/Ops/MatMulOp/Main.cpp | 0 benchmarks/Gemmini/Ops/MatMulOp/matmul.mlir | 0 benchmarks/Gemmini/README.md | 0 benchmarks/Gemmini/ResNet-101/.gitattributes | 0 benchmarks/Gemmini/ResNet-101/CMakeLists.txt | 0 benchmarks/Gemmini/ResNet-101/CRunnerUtils.cpp | 0 benchmarks/Gemmini/ResNet-101/Main.cpp | 0 benchmarks/Gemmini/ResNet-101/ResNet101.mlir | 0 benchmarks/Gemmini/ResNet-101/images/Cat.h | 0 benchmarks/Gemmini/ResNet-101/images/Cat.jpg | Bin benchmarks/Gemmini/ResNet-101/include/Labels.h | 0 benchmarks/Gemmini/Utils.h | 0 benchmarks/ImageProcessing/BuddyConv2DBenchmark.cpp | 0 benchmarks/ImageProcessing/BuddyCorr2DBenchmark.cpp | 0 .../ImageProcessing/BuddyMorph2DBenchmark.cpp | 0 .../ImageProcessing/BuddyResize2DBenchmark.cpp | 0 benchmarks/ImageProcessing/CMakeLists.txt | 0 .../ImageProcessing/EigenConvolve2DBenchmark.cpp | 0 benchmarks/ImageProcessing/Images/YuTu.png | Bin benchmarks/ImageProcessing/Images/YuTu1022.png | Bin benchmarks/ImageProcessing/Images/YuTu1024.png | Bin benchmarks/ImageProcessing/Images/YuTu128.png | Bin benchmarks/ImageProcessing/Images/YuTu16.png | Bin benchmarks/ImageProcessing/Images/YuTu18.png | Bin benchmarks/ImageProcessing/Images/YuTu256.png | Bin benchmarks/ImageProcessing/Images/YuTu32.png | Bin benchmarks/ImageProcessing/Images/YuTu4.png | Bin benchmarks/ImageProcessing/Images/YuTu512.png | Bin benchmarks/ImageProcessing/Images/YuTu6.png | Bin benchmarks/ImageProcessing/Images/YuTu64.png | Bin benchmarks/ImageProcessing/Images/YuTu8.png | Bin benchmarks/ImageProcessing/MLIRConv2D.mlir | 0 benchmarks/ImageProcessing/MLIRConv2DBenchmark.cpp | 0 benchmarks/ImageProcessing/Main.cpp | 0 .../ImageProcessing/OpenCVFilter2DBenchmark.cpp | 0 .../ImageProcessing/OpenCVMorph2DBenchmark.cpp | 0 .../ImageProcessing/OpenCVResize2DBenchmark.cpp | 0 benchmarks/ImageProcessing/include/Kernels.h | 0 benchmarks/OpOptimization/CMakeLists.txt | 0 .../OpOptimization/Conv2dNchwFchw/CMakeLists.txt | 0 .../Conv2dNchwFchw/Conv2DNchwFchw.mlir | 0 .../Conv2dNchwFchw/Conv2DNchwFchwBenchmark.cpp | 0 .../Conv2dNchwFchw/Conv2DNchwFchwBroadcast.mlir | 0 .../Conv2dNchwFchw/Conv2DNchwFchwIm2col.mlir | 0 .../Conv2dNchwFchw/Conv2DNchwFchwWinagrad.mlir | 0 benchmarks/OpOptimization/Conv2dNchwFchw/Main.cpp | 0 benchmarks/OpOptimization/MatMul/CMakeLists.txt | 0 benchmarks/OpOptimization/MatMul/Main.cpp | 0 benchmarks/OpOptimization/MatMul/MatMul.mlir | 0 .../OpOptimization/MatMul/MatMulBenchmark.cpp | 0 .../OpOptimization/MatMul/MatMulBroadcast.mlir | 0 .../OpOptimization/MatMul/MatMulTransform.mlir | 0 benchmarks/OpOptimization/MatMul/TVM/.gitignore | 0 benchmarks/OpOptimization/MatMul/TVM/main.py | 0 .../OpOptimization/MatMul/TVM/matmul_autotvm.py | 0 .../OpOptimization/MatMul/TVM/matmul_manual.py | 0 benchmarks/Vectorization/CMakeLists.txt | 0 benchmarks/Vectorization/MLIRMatMul.mlir | 0 benchmarks/Vectorization/MLIRMatMulBenchmark.cpp | 0 benchmarks/Vectorization/MLIRMatVec.mlir | 0 benchmarks/Vectorization/MLIRMatVecBenchmark.cpp | 0 benchmarks/Vectorization/Main.cpp | 0 benchmarks/Vectorization/gccloops/CMakeLists.txt | 0 .../Vectorization/gccloops/MLIRGccLoopsEx1.mlir | 0 .../Vectorization/gccloops/MLIRGccLoopsEx10a.mlir | 0 .../gccloops/MLIRGccLoopsEx10aBenchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx10b.mlir | 0 .../gccloops/MLIRGccLoopsEx10bBenchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx11.mlir | 0 .../gccloops/MLIRGccLoopsEx11Benchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx12.mlir | 0 .../gccloops/MLIRGccLoopsEx12Benchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx13.mlir | 0 .../gccloops/MLIRGccLoopsEx13Benchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx14.mlir | 0 .../gccloops/MLIRGccLoopsEx14Benchmark.cpp | 0 .../gccloops/MLIRGccLoopsEx1Benchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx21.mlir | 0 .../gccloops/MLIRGccLoopsEx21Benchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx23.mlir | 0 .../gccloops/MLIRGccLoopsEx23Benchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx24.mlir | 0 .../gccloops/MLIRGccLoopsEx24Benchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx25.mlir | 0 .../gccloops/MLIRGccLoopsEx25Benchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx2a.mlir | 0 .../gccloops/MLIRGccLoopsEx2aBenchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx2b.mlir | 0 .../gccloops/MLIRGccLoopsEx2bBenchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx3.mlir | 0 .../gccloops/MLIRGccLoopsEx3Benchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx4a.mlir | 0 .../gccloops/MLIRGccLoopsEx4aBenchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx4b.mlir | 0 .../gccloops/MLIRGccLoopsEx4bBenchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx4c.mlir | 0 .../gccloops/MLIRGccLoopsEx4cBenchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx7.mlir | 0 .../gccloops/MLIRGccLoopsEx7Benchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx8.mlir | 0 .../gccloops/MLIRGccLoopsEx8Benchmark.cpp | 0 .../Vectorization/gccloops/MLIRGccLoopsEx9.mlir | 0 .../gccloops/MLIRGccLoopsEx9Benchmark.cpp | 0 benchmarks/Vectorization/gccloops/Main.cpp | 0 benchmarks/Vectorization/linpackc/CMakeLists.txt | 0 .../linpackc/MLIRLinpackCDaxpyBenchmark.cpp | 0 .../linpackc/MLIRLinpackCDaxpyRollF32.mlir | 0 .../linpackc/MLIRLinpackCDaxpyRollF64.mlir | 0 .../linpackc/MLIRLinpackCDaxpyUnrollF32.mlir | 0 .../linpackc/MLIRLinpackCDaxpyUnrollF64.mlir | 0 benchmarks/Vectorization/linpackc/Main.cpp | 0 benchmarks/Vectorization/polybench/CMakeLists.txt | 0 .../Vectorization/polybench/MLIRPolybench2mm.mlir | 0 .../polybench/MLIRPolybench2mmBenchmark.cpp | 0 .../Vectorization/polybench/MLIRPolybench3mm.mlir | 0 .../polybench/MLIRPolybench3mmBenchmark.cpp | 0 .../Vectorization/polybench/MLIRPolybenchAdi.mlir | 0 .../polybench/MLIRPolybenchAdiBenchmark.cpp | 0 .../Vectorization/polybench/MLIRPolybenchAtax.mlir | 0 .../polybench/MLIRPolybenchAtaxBenchmark.cpp | 0 .../Vectorization/polybench/MLIRPolybenchBicg.mlir | 0 .../polybench/MLIRPolybenchBicgBenchmark.cpp | 0 .../polybench/MLIRPolybenchCholesky.mlir | 0 .../polybench/MLIRPolybenchCholeskyBenchmark.cpp | 0 .../polybench/MLIRPolybenchCorrelation.mlir | 0 .../polybench/MLIRPolybenchCorrelationBenchmark.cpp | 0 .../polybench/MLIRPolybenchCovariance.mlir | 0 .../polybench/MLIRPolybenchCovarianceBenchmark.cpp | 0 .../polybench/MLIRPolybenchDeriche.mlir | 0 .../polybench/MLIRPolybenchDericheBenchmark.cpp | 0 .../polybench/MLIRPolybenchDoitgen.mlir | 0 .../polybench/MLIRPolybenchDoitgenBenchmark.cpp | 0 .../polybench/MLIRPolybenchDurbin.mlir | 0 .../polybench/MLIRPolybenchDurbinBenchmark.cpp | 0 .../polybench/MLIRPolybenchFdtd2D.mlir | 0 .../polybench/MLIRPolybenchFdtd2DBenchmark.cpp | 0 .../polybench/MLIRPolybenchFloydWarshall.mlir | 0 .../MLIRPolybenchFloydWarshallBenchmark.cpp | 0 .../Vectorization/polybench/MLIRPolybenchGemm.mlir | 0 .../polybench/MLIRPolybenchGemmBenchmark.cpp | 0 .../polybench/MLIRPolybenchGemver.mlir | 0 .../polybench/MLIRPolybenchGemverBenchmark.cpp | 0 .../polybench/MLIRPolybenchGesummv.mlir | 0 .../polybench/MLIRPolybenchGesummvBenchmark.cpp | 0 .../polybench/MLIRPolybenchGramschmidt.mlir | 0 .../polybench/MLIRPolybenchGramschmidtBenchmark.cpp | 0 .../polybench/MLIRPolybenchHeat3D.mlir | 0 .../polybench/MLIRPolybenchHeat3DBenchmark.cpp | 0 .../polybench/MLIRPolybenchJacobi1D.mlir | 0 .../polybench/MLIRPolybenchJacobi1DBenchmark.cpp | 0 .../polybench/MLIRPolybenchJacobi2D.mlir | 0 .../polybench/MLIRPolybenchJacobi2DBenchmark.cpp | 0 .../Vectorization/polybench/MLIRPolybenchLu.mlir | 0 .../polybench/MLIRPolybenchLuBenchmark.cpp | 0 .../polybench/MLIRPolybenchLudcmp.mlir | 0 .../polybench/MLIRPolybenchLudcmpBenchmark.cpp | 0 .../Vectorization/polybench/MLIRPolybenchMvt.mlir | 0 .../polybench/MLIRPolybenchMvtBenchmark.cpp | 0 .../polybench/MLIRPolybenchNussinov.mlir | 0 .../polybench/MLIRPolybenchNussinovBenchmark.cpp | 0 .../polybench/MLIRPolybenchSeidel2D.mlir | 0 .../polybench/MLIRPolybenchSeidel2DBenchmark.cpp | 0 .../Vectorization/polybench/MLIRPolybenchSymm.mlir | 0 .../polybench/MLIRPolybenchSymmBenchmark.cpp | 0 .../Vectorization/polybench/MLIRPolybenchSyr2k.mlir | 0 .../polybench/MLIRPolybenchSyr2kBenchmark.cpp | 0 .../Vectorization/polybench/MLIRPolybenchSyrk.mlir | 0 .../polybench/MLIRPolybenchSyrkBenchmark.cpp | 0 .../polybench/MLIRPolybenchTrisolv.mlir | 0 .../polybench/MLIRPolybenchTrisolvBenchmark.cpp | 0 .../Vectorization/polybench/MLIRPolybenchTrmm.mlir | 0 .../polybench/MLIRPolybenchTrmmBenchmark.cpp | 0 benchmarks/Vectorization/polybench/Main.cpp | 0 benchmarks/Vectorization/polybench/README.md | 0 benchmarks/Vectorization/polybench/Utils.hpp | 0 .../Vectorization/polybench/polybench_mlir_gen.py | 0 cmake/buddy-benchmark.cmake | 0 cmake/check-simd.cmake | 0 docs/ConvAlgorithms.md | 0 docs/DeepLearningBenchmark.md | 0 docs/GemminiConfig.md | 0 docs/Images/CoefficientsBroadcasting.png | Bin docs/PrepareRVOpenMP.md | 0 requirements.txt | 0 scripts/logs2html.py | 0 scripts/run_docker.sh | 0 .../build_results_crosscompile_summary.log | 0 test_result/deeplearning/build_results_summary.log | 0 test_result/deeplearning/dl-layer-ffn-benchmark.log | 0 .../deeplearning/dl-layer-rmsnorm-benchmark.log | 0 .../dl-layer-selfattention-benchmark.log | 0 .../deeplearning/dl-model-lenet-benchmark.log | 0 .../deeplearning/dl-model-mobilenetv3-benchmark.log | 0 .../deeplearning/dl-model-resnet18-benchmark.log | 0 .../deeplearning/dl-model-tinyllama-benchmark.log | 0 .../deeplearning/dl-model-whisper-benchmark.log | 0 .../dl-op-linalg-arithaddf-benchmark.log | 0 .../dl-op-linalg-arithdivf-benchmark.log | 0 .../dl-op-linalg-arithmulf-benchmark.log | 0 .../dl-op-linalg-arithnegf-benchmark.log | 0 .../dl-op-linalg-arithsubf-benchmark.log | 0 .../dl-op-linalg-batch-matmul-benchmark.log | 0 .../dl-op-linalg-conv2d-nchw-fchw-benchmark.log | 0 .../dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log | 0 .../dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log | 0 ...-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log | 0 .../deeplearning/dl-op-linalg-mathexp-benchmark.log | 0 .../dl-op-linalg-mathfpow-benchmark.log | 0 .../dl-op-linalg-mathrsqrt-benchmark.log | 0 .../deeplearning/dl-op-linalg-matmul-benchmark.log | 0 .../dl-op-linalg-pooling-nhwc-sum-benchmark.log | 0 .../dl-op-linalg-reduceaddf-benchmark.log | 0 .../dl-op-linalg-reducemaxf-benchmark.log | 0 .../dl-op-linalg-softmax-exp-sum-div-benchmark.log | 0 .../dl-op-matmul-transpose-b-benchmark.log | 0 .../deeplearning/dl-op-tosa-transpose-benchmark.log | 0 test_result/deeplearning/run_results_summary.log | 0 test_result/geminiprocessing/build.log | 0 test_result/geminiprocessing/cmake_configure.log | 0 test_result/vectorization/vectorization_result.log | 0 thirdparty/README.md | 0 utils/CMakeLists.txt | 0 utils/plots/CMakeLists.txt | 0 utils/plots/Main.cpp | 0 utils/plots/python/plot.py | 0 utils/plots/python/plotools/.gitignore | 0 utils/plots/python/plotools/__init__.py | 0 utils/plots/python/plotools/compare.py | 0 utils/plots/source_dir.h.in | 0 validation/AudioProcessing/AudioValidationLib.cpp | 0 validation/AudioProcessing/CMakeLists.txt | 0 validation/CMakeLists.txt | 0 validation/Python/.gitignore | 0 validation/Python/__init__.py | 0 validation/Python/audio/__init__.py | 0 validation/Python/audio/audio_file.py | 0 validation/Python/audio/audio_test.py | 0 validation/Python/audio/fir.py | 0 validation/Python/main.py | 0 validation/Python/requirements.txt | 0 validation/Python/utils/__init__.py | 0 validation/Python/utils/audio_format.py | 0 validation/Python/utils/lib_path.py | 0 validation/README.md | 0 410 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 .clang-format mode change 100644 => 100755 .gitattributes mode change 100644 => 100755 .github/ISSUE_TEMPLATE/bug_report.md mode change 100644 => 100755 .github/ISSUE_TEMPLATE/feature_request.md rename .github/{workflow => workflows}/bench.yml (100%) mode change 100644 => 100755 mode change 100644 => 100755 .gitignore mode change 100644 => 100755 .gitmodules mode change 100644 => 100755 .style.yapf mode change 100644 => 100755 CMakeLists.txt mode change 100644 => 100755 LICENSE mode change 100644 => 100755 README.md mode change 100644 => 100755 benchmarks/AudioProcessing/Audios/NASA_Mars.wav mode change 100644 => 100755 benchmarks/AudioProcessing/CMakeLists.txt mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/BiquadOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/BiquadOp/MLIRBiquad.mlir mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/BiquadOp/Main.cpp mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/BiquadOp/Utils.hpp mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/CMakeLists.txt mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/FFTOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/FFTOp/KFRFft.cpp mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/FFTOp/Main.cpp mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/FIROp/CMakeLists.txt mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/FIROp/MLIRFIR.mlir mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRTiledVectorization.mlir mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRVectorization.mlir mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/FIROp/Main.cpp mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/FIROp/Utils.hpp mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/IIROp/CMakeLists.txt mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRScalar.mlir mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRVectorization.mlir mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/IIROp/Main.cpp mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/IIROp/Utils.hpp mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/RFFTOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/RFFTOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/AudioProcessing/Operations/RFFTOp/RFFT.py mode change 100644 => 100755 benchmarks/AudioProcessing/README.md mode change 100644 => 100755 benchmarks/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Layers/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Layers/FFN/.gitignore mode change 100644 => 100755 benchmarks/DeepLearning/Layers/FFN/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Layers/FFN/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Layers/FFN/buddy_ffn_import.py mode change 100644 => 100755 benchmarks/DeepLearning/Layers/RMSNorm/.gitignore mode change 100644 => 100755 benchmarks/DeepLearning/Layers/RMSNorm/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Layers/RMSNorm/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Layers/RMSNorm/buddy_rmsnorm_import.py mode change 100644 => 100755 benchmarks/DeepLearning/Layers/SelfAttention/.gitignore mode change 100644 => 100755 benchmarks/DeepLearning/Layers/SelfAttention/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Layers/SelfAttention/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Layers/SelfAttention/buddy_selfattention_import.py mode change 100644 => 100755 benchmarks/DeepLearning/Models/Bert/.gitignore mode change 100644 => 100755 benchmarks/DeepLearning/Models/Bert/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Models/Bert/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Models/Bert/buddy_bert_import.py mode change 100644 => 100755 benchmarks/DeepLearning/Models/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Models/LeNet/.gitignore mode change 100644 => 100755 benchmarks/DeepLearning/Models/LeNet/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Models/LeNet/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Models/LeNet/buddy_lenet_import.py mode change 100644 => 100755 benchmarks/DeepLearning/Models/LeNet/lenet_model.pth mode change 100644 => 100755 benchmarks/DeepLearning/Models/LeNet/model.py mode change 100644 => 100755 benchmarks/DeepLearning/Models/MobileNet-V3/.gitignore mode change 100644 => 100755 benchmarks/DeepLearning/Models/MobileNet-V3/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Models/MobileNet-V3/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Models/MobileNet-V3/buddy_mobilenetv3_import.py mode change 100644 => 100755 benchmarks/DeepLearning/Models/Resnet18/.gitignore mode change 100644 => 100755 benchmarks/DeepLearning/Models/Resnet18/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Models/Resnet18/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Models/Resnet18/buddy_resnet18_import.py mode change 100644 => 100755 benchmarks/DeepLearning/Models/TinyLlama/.gitignore mode change 100644 => 100755 benchmarks/DeepLearning/Models/TinyLlama/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Models/TinyLlama/Main.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Models/TinyLlama/Utils.hpp mode change 100644 => 100755 benchmarks/DeepLearning/Models/TinyLlama/buddy_tinyllama_import.py mode change 100644 => 100755 benchmarks/DeepLearning/Models/Whisper/.gitignore mode change 100644 => 100755 benchmarks/DeepLearning/Models/Whisper/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Models/Whisper/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Models/Whisper/buddy_whisper_import.py mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithAddfOp/ArithAddf.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithAddfOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithAddfOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithDivfOp/ArithDivf.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithDivfOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithDivfOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithMulfOp/ArithMulf.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithMulfOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithMulfOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithNegfOp/ArithNegf.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithNegfOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithNegfOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithSubfOp/ArithSubf.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithSubfOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ArithSubfOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMul.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulBroadcast.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulSCF.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVec.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVecTile.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/BatchMatMulOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/BatchMatMulOp/Main.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/BatchMatMulOp/Utils.hpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/Conv2DNchwFchw.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/conv2d-nchw-fchw-im2col.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwc.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVec.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVecRVV.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Main.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Utils.hpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwc.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVec.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVecTile.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Main.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Utils.hpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/Conv2DNhwcHwcf.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwc.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwcVec.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Main.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Utils.hpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MatMulOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MatMulOp/Main.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MatMulOp/Utils.hpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MatMulOp/matmul.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MatMulTransposeBOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Main.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MatMulTransposeBOp/MatMulTransposeB.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Utils.hpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MathExpOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MathExpOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MathExpOp/MathExp.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MathFpowOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MathFpowOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MathFpowOp/MathFpow.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MathRsqrtOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MathRsqrtOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/MathRsqrtOp/MathRsqrt.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/PoolingNhwcSum.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ReduceAddfOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ReduceAddfOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ReduceAddfOp/ReduceAddf.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ReduceMaxfOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ReduceMaxfOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/ReduceMaxfOp/ReduceMaxf.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/GoogleBenchmarkMain.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/SoftmaxExpSumDiv.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/TransposeOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/DeepLearning/Ops/TransposeOp/Main.cpp mode change 100644 => 100755 benchmarks/DeepLearning/Ops/TransposeOp/Transpose2D.mlir mode change 100644 => 100755 benchmarks/DeepLearning/Ops/TransposeOp/Utils.hpp mode change 100644 => 100755 benchmarks/DeepLearning/README.md mode change 100644 => 100755 benchmarks/Gemmini/CMakeLists.txt mode change 100644 => 100755 benchmarks/Gemmini/Ops/CMakeLists.txt mode change 100644 => 100755 benchmarks/Gemmini/Ops/MatMulOp/CMakeLists.txt mode change 100644 => 100755 benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c mode change 100644 => 100755 benchmarks/Gemmini/Ops/MatMulOp/ExoUtils.h mode change 100644 => 100755 benchmarks/Gemmini/Ops/MatMulOp/Main.cpp mode change 100644 => 100755 benchmarks/Gemmini/Ops/MatMulOp/matmul.mlir mode change 100644 => 100755 benchmarks/Gemmini/README.md mode change 100644 => 100755 benchmarks/Gemmini/ResNet-101/.gitattributes mode change 100644 => 100755 benchmarks/Gemmini/ResNet-101/CMakeLists.txt mode change 100644 => 100755 benchmarks/Gemmini/ResNet-101/CRunnerUtils.cpp mode change 100644 => 100755 benchmarks/Gemmini/ResNet-101/Main.cpp mode change 100644 => 100755 benchmarks/Gemmini/ResNet-101/ResNet101.mlir mode change 100644 => 100755 benchmarks/Gemmini/ResNet-101/images/Cat.h mode change 100644 => 100755 benchmarks/Gemmini/ResNet-101/images/Cat.jpg mode change 100644 => 100755 benchmarks/Gemmini/ResNet-101/include/Labels.h mode change 100644 => 100755 benchmarks/Gemmini/Utils.h mode change 100644 => 100755 benchmarks/ImageProcessing/BuddyConv2DBenchmark.cpp mode change 100644 => 100755 benchmarks/ImageProcessing/BuddyCorr2DBenchmark.cpp mode change 100644 => 100755 benchmarks/ImageProcessing/BuddyMorph2DBenchmark.cpp mode change 100644 => 100755 benchmarks/ImageProcessing/BuddyResize2DBenchmark.cpp mode change 100644 => 100755 benchmarks/ImageProcessing/CMakeLists.txt mode change 100644 => 100755 benchmarks/ImageProcessing/EigenConvolve2DBenchmark.cpp mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu1022.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu1024.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu128.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu16.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu18.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu256.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu32.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu4.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu512.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu6.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu64.png mode change 100644 => 100755 benchmarks/ImageProcessing/Images/YuTu8.png mode change 100644 => 100755 benchmarks/ImageProcessing/MLIRConv2D.mlir mode change 100644 => 100755 benchmarks/ImageProcessing/MLIRConv2DBenchmark.cpp mode change 100644 => 100755 benchmarks/ImageProcessing/Main.cpp mode change 100644 => 100755 benchmarks/ImageProcessing/OpenCVFilter2DBenchmark.cpp mode change 100644 => 100755 benchmarks/ImageProcessing/OpenCVMorph2DBenchmark.cpp mode change 100644 => 100755 benchmarks/ImageProcessing/OpenCVResize2DBenchmark.cpp mode change 100644 => 100755 benchmarks/ImageProcessing/include/Kernels.h mode change 100644 => 100755 benchmarks/OpOptimization/CMakeLists.txt mode change 100644 => 100755 benchmarks/OpOptimization/Conv2dNchwFchw/CMakeLists.txt mode change 100644 => 100755 benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchw.mlir mode change 100644 => 100755 benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBenchmark.cpp mode change 100644 => 100755 benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBroadcast.mlir mode change 100644 => 100755 benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwIm2col.mlir mode change 100644 => 100755 benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwWinagrad.mlir mode change 100644 => 100755 benchmarks/OpOptimization/Conv2dNchwFchw/Main.cpp mode change 100644 => 100755 benchmarks/OpOptimization/MatMul/CMakeLists.txt mode change 100644 => 100755 benchmarks/OpOptimization/MatMul/Main.cpp mode change 100644 => 100755 benchmarks/OpOptimization/MatMul/MatMul.mlir mode change 100644 => 100755 benchmarks/OpOptimization/MatMul/MatMulBenchmark.cpp mode change 100644 => 100755 benchmarks/OpOptimization/MatMul/MatMulBroadcast.mlir mode change 100644 => 100755 benchmarks/OpOptimization/MatMul/MatMulTransform.mlir mode change 100644 => 100755 benchmarks/OpOptimization/MatMul/TVM/.gitignore mode change 100644 => 100755 benchmarks/OpOptimization/MatMul/TVM/main.py mode change 100644 => 100755 benchmarks/OpOptimization/MatMul/TVM/matmul_autotvm.py mode change 100644 => 100755 benchmarks/OpOptimization/MatMul/TVM/matmul_manual.py mode change 100644 => 100755 benchmarks/Vectorization/CMakeLists.txt mode change 100644 => 100755 benchmarks/Vectorization/MLIRMatMul.mlir mode change 100644 => 100755 benchmarks/Vectorization/MLIRMatMulBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/MLIRMatVec.mlir mode change 100644 => 100755 benchmarks/Vectorization/MLIRMatVecBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/Main.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/CMakeLists.txt mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10a.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10aBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10b.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10bBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2a.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2aBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2b.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2bBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4a.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4aBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4b.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4bBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4c.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4cBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9.mlir mode change 100644 => 100755 benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9Benchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/gccloops/Main.cpp mode change 100644 => 100755 benchmarks/Vectorization/linpackc/CMakeLists.txt mode change 100644 => 100755 benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF32.mlir mode change 100644 => 100755 benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF64.mlir mode change 100644 => 100755 benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF32.mlir mode change 100644 => 100755 benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF64.mlir mode change 100644 => 100755 benchmarks/Vectorization/linpackc/Main.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/CMakeLists.txt mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybench2mm.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybench2mmBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybench3mm.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybench3mmBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchAdi.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchAdiBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchAtax.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchAtaxBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchBicg.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchBicgBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchCholesky.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchCholeskyBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchCorrelation.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchCorrelationBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchCovariance.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchCovarianceBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchDeriche.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchDericheBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchDoitgen.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchDoitgenBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchDurbin.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchDurbinBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2D.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2DBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshall.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshallBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchGemm.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchGemmBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchGemver.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchGemverBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchGesummv.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchGesummvBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidt.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidtBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchHeat3D.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchHeat3DBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1D.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1DBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2D.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2DBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchLu.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchLuBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchLudcmp.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchLudcmpBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchMvt.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchMvtBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchNussinov.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchNussinovBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2D.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2DBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchSymm.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchSymmBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchSyr2k.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchSyr2kBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchSyrk.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchSyrkBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchTrisolv.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchTrisolvBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchTrmm.mlir mode change 100644 => 100755 benchmarks/Vectorization/polybench/MLIRPolybenchTrmmBenchmark.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/Main.cpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/README.md mode change 100644 => 100755 benchmarks/Vectorization/polybench/Utils.hpp mode change 100644 => 100755 benchmarks/Vectorization/polybench/polybench_mlir_gen.py mode change 100644 => 100755 cmake/buddy-benchmark.cmake mode change 100644 => 100755 cmake/check-simd.cmake mode change 100644 => 100755 docs/ConvAlgorithms.md mode change 100644 => 100755 docs/DeepLearningBenchmark.md mode change 100644 => 100755 docs/GemminiConfig.md mode change 100644 => 100755 docs/Images/CoefficientsBroadcasting.png mode change 100644 => 100755 docs/PrepareRVOpenMP.md mode change 100644 => 100755 requirements.txt mode change 100644 => 100755 scripts/logs2html.py mode change 100644 => 100755 scripts/run_docker.sh mode change 100644 => 100755 test_result/deeplearning/build_results_crosscompile_summary.log mode change 100644 => 100755 test_result/deeplearning/build_results_summary.log mode change 100644 => 100755 test_result/deeplearning/dl-layer-ffn-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-layer-rmsnorm-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-layer-selfattention-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-model-lenet-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-model-mobilenetv3-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-model-resnet18-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-model-tinyllama-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-model-whisper-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-matmul-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-tosa-transpose-benchmark.log mode change 100644 => 100755 test_result/deeplearning/run_results_summary.log mode change 100644 => 100755 test_result/geminiprocessing/build.log mode change 100644 => 100755 test_result/geminiprocessing/cmake_configure.log mode change 100644 => 100755 test_result/vectorization/vectorization_result.log mode change 100644 => 100755 thirdparty/README.md mode change 100644 => 100755 utils/CMakeLists.txt mode change 100644 => 100755 utils/plots/CMakeLists.txt mode change 100644 => 100755 utils/plots/Main.cpp mode change 100644 => 100755 utils/plots/python/plot.py mode change 100644 => 100755 utils/plots/python/plotools/.gitignore mode change 100644 => 100755 utils/plots/python/plotools/__init__.py mode change 100644 => 100755 utils/plots/python/plotools/compare.py mode change 100644 => 100755 utils/plots/source_dir.h.in mode change 100644 => 100755 validation/AudioProcessing/AudioValidationLib.cpp mode change 100644 => 100755 validation/AudioProcessing/CMakeLists.txt mode change 100644 => 100755 validation/CMakeLists.txt mode change 100644 => 100755 validation/Python/.gitignore mode change 100644 => 100755 validation/Python/__init__.py mode change 100644 => 100755 validation/Python/audio/__init__.py mode change 100644 => 100755 validation/Python/audio/audio_file.py mode change 100644 => 100755 validation/Python/audio/audio_test.py mode change 100644 => 100755 validation/Python/audio/fir.py mode change 100644 => 100755 validation/Python/main.py mode change 100644 => 100755 validation/Python/requirements.txt mode change 100644 => 100755 validation/Python/utils/__init__.py mode change 100644 => 100755 validation/Python/utils/audio_format.py mode change 100644 => 100755 validation/Python/utils/lib_path.py mode change 100644 => 100755 validation/README.md diff --git a/.clang-format b/.clang-format old mode 100644 new mode 100755 diff --git a/.gitattributes b/.gitattributes old mode 100644 new mode 100755 diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md old mode 100644 new mode 100755 diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md old mode 100644 new mode 100755 diff --git a/.github/workflow/bench.yml b/.github/workflows/bench.yml old mode 100644 new mode 100755 similarity index 100% rename from .github/workflow/bench.yml rename to .github/workflows/bench.yml diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/.gitmodules b/.gitmodules old mode 100644 new mode 100755 diff --git a/.style.yapf b/.style.yapf old mode 100644 new mode 100755 diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Audios/NASA_Mars.wav b/benchmarks/AudioProcessing/Audios/NASA_Mars.wav old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/CMakeLists.txt b/benchmarks/AudioProcessing/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/BiquadOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/MLIRBiquad.mlir b/benchmarks/AudioProcessing/Operations/BiquadOp/MLIRBiquad.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/Main.cpp b/benchmarks/AudioProcessing/Operations/BiquadOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/BiquadOp/Utils.hpp b/benchmarks/AudioProcessing/Operations/BiquadOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FFTOp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/FFTOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FFTOp/KFRFft.cpp b/benchmarks/AudioProcessing/Operations/FFTOp/KFRFft.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FFTOp/Main.cpp b/benchmarks/AudioProcessing/Operations/FFTOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/FIROp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIR.mlir b/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIR.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRTiledVectorization.mlir b/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRTiledVectorization.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRVectorization.mlir b/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRVectorization.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/Main.cpp b/benchmarks/AudioProcessing/Operations/FIROp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/FIROp/Utils.hpp b/benchmarks/AudioProcessing/Operations/FIROp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/IIROp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/IIROp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRScalar.mlir b/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRScalar.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRVectorization.mlir b/benchmarks/AudioProcessing/Operations/IIROp/MLIRIIRVectorization.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/IIROp/Main.cpp b/benchmarks/AudioProcessing/Operations/IIROp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/IIROp/Utils.hpp b/benchmarks/AudioProcessing/Operations/IIROp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/RFFTOp/CMakeLists.txt b/benchmarks/AudioProcessing/Operations/RFFTOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/RFFTOp/GoogleBenchmarkMain.cpp b/benchmarks/AudioProcessing/Operations/RFFTOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/Operations/RFFTOp/RFFT.py b/benchmarks/AudioProcessing/Operations/RFFTOp/RFFT.py old mode 100644 new mode 100755 diff --git a/benchmarks/AudioProcessing/README.md b/benchmarks/AudioProcessing/README.md old mode 100644 new mode 100755 diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/CMakeLists.txt b/benchmarks/DeepLearning/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/CMakeLists.txt b/benchmarks/DeepLearning/Layers/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/FFN/.gitignore b/benchmarks/DeepLearning/Layers/FFN/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/FFN/CMakeLists.txt b/benchmarks/DeepLearning/Layers/FFN/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/FFN/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Layers/FFN/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/FFN/buddy_ffn_import.py b/benchmarks/DeepLearning/Layers/FFN/buddy_ffn_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/.gitignore b/benchmarks/DeepLearning/Layers/RMSNorm/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/CMakeLists.txt b/benchmarks/DeepLearning/Layers/RMSNorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Layers/RMSNorm/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/RMSNorm/buddy_rmsnorm_import.py b/benchmarks/DeepLearning/Layers/RMSNorm/buddy_rmsnorm_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/.gitignore b/benchmarks/DeepLearning/Layers/SelfAttention/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/CMakeLists.txt b/benchmarks/DeepLearning/Layers/SelfAttention/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Layers/SelfAttention/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Layers/SelfAttention/buddy_selfattention_import.py b/benchmarks/DeepLearning/Layers/SelfAttention/buddy_selfattention_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Bert/.gitignore b/benchmarks/DeepLearning/Models/Bert/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Bert/CMakeLists.txt b/benchmarks/DeepLearning/Models/Bert/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Bert/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/Bert/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Bert/buddy_bert_import.py b/benchmarks/DeepLearning/Models/Bert/buddy_bert_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/CMakeLists.txt b/benchmarks/DeepLearning/Models/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/.gitignore b/benchmarks/DeepLearning/Models/LeNet/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/CMakeLists.txt b/benchmarks/DeepLearning/Models/LeNet/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/LeNet/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/buddy_lenet_import.py b/benchmarks/DeepLearning/Models/LeNet/buddy_lenet_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/lenet_model.pth b/benchmarks/DeepLearning/Models/LeNet/lenet_model.pth old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/LeNet/model.py b/benchmarks/DeepLearning/Models/LeNet/model.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/.gitignore b/benchmarks/DeepLearning/Models/MobileNet-V3/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/CMakeLists.txt b/benchmarks/DeepLearning/Models/MobileNet-V3/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/MobileNet-V3/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/MobileNet-V3/buddy_mobilenetv3_import.py b/benchmarks/DeepLearning/Models/MobileNet-V3/buddy_mobilenetv3_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Resnet18/.gitignore b/benchmarks/DeepLearning/Models/Resnet18/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Resnet18/CMakeLists.txt b/benchmarks/DeepLearning/Models/Resnet18/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Resnet18/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/Resnet18/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Resnet18/buddy_resnet18_import.py b/benchmarks/DeepLearning/Models/Resnet18/buddy_resnet18_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/TinyLlama/.gitignore b/benchmarks/DeepLearning/Models/TinyLlama/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/TinyLlama/CMakeLists.txt b/benchmarks/DeepLearning/Models/TinyLlama/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/TinyLlama/Main.cpp b/benchmarks/DeepLearning/Models/TinyLlama/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/TinyLlama/Utils.hpp b/benchmarks/DeepLearning/Models/TinyLlama/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/TinyLlama/buddy_tinyllama_import.py b/benchmarks/DeepLearning/Models/TinyLlama/buddy_tinyllama_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Whisper/.gitignore b/benchmarks/DeepLearning/Models/Whisper/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Whisper/CMakeLists.txt b/benchmarks/DeepLearning/Models/Whisper/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Whisper/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Models/Whisper/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Models/Whisper/buddy_whisper_import.py b/benchmarks/DeepLearning/Models/Whisper/buddy_whisper_import.py old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithAddfOp/ArithAddf.mlir b/benchmarks/DeepLearning/Ops/ArithAddfOp/ArithAddf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithAddfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithAddfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithAddfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithAddfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithDivfOp/ArithDivf.mlir b/benchmarks/DeepLearning/Ops/ArithDivfOp/ArithDivf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithDivfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithDivfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithDivfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithDivfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithMulfOp/ArithMulf.mlir b/benchmarks/DeepLearning/Ops/ArithMulfOp/ArithMulf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithMulfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithMulfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithMulfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithMulfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithNegfOp/ArithNegf.mlir b/benchmarks/DeepLearning/Ops/ArithNegfOp/ArithNegf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithNegfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithNegfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithNegfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithNegfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithSubfOp/ArithSubf.mlir b/benchmarks/DeepLearning/Ops/ArithSubfOp/ArithSubf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithSubfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ArithSubfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ArithSubfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ArithSubfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMul.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMul.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulBroadcast.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulBroadcast.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulSCF.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulSCF.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVec.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVec.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVecTile.mlir b/benchmarks/DeepLearning/Ops/BatchMatMulOp/BatchMatMulVecTile.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/BatchMatMulOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/Main.cpp b/benchmarks/DeepLearning/Ops/BatchMatMulOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/BatchMatMulOp/Utils.hpp b/benchmarks/DeepLearning/Ops/BatchMatMulOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/CMakeLists.txt b/benchmarks/DeepLearning/Ops/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/Conv2DNchwFchw.mlir b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/Conv2DNchwFchw.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/conv2d-nchw-fchw-im2col.mlir b/benchmarks/DeepLearning/Ops/Conv2DNchwFchwOp/conv2d-nchw-fchw-im2col.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwc.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwc.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVec.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVec.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVecRVV.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Conv2DNhwcFhwcVecRVV.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Main.cpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Utils.hpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcInt32Op/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwc.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwc.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVec.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVec.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVecTile.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Conv2DNhwcFhwcVecTile.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Main.cpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Utils.hpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcFhwcOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/Conv2DNhwcHwcf.mlir b/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/Conv2DNhwcHwcf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/Conv2DNhwcHwcfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwc.mlir b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwc.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwcVec.mlir b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/DepthwiseConv2DNhwcHwcVec.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Main.cpp b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Utils.hpp b/benchmarks/DeepLearning/Ops/DepthwiseConv2DNhwcHwcOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MatMulOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/Main.cpp b/benchmarks/DeepLearning/Ops/MatMulOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/Utils.hpp b/benchmarks/DeepLearning/Ops/MatMulOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulOp/matmul.mlir b/benchmarks/DeepLearning/Ops/MatMulOp/matmul.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Main.cpp b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/MatMulTransposeB.mlir b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/MatMulTransposeB.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Utils.hpp b/benchmarks/DeepLearning/Ops/MatMulTransposeBOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathExpOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MathExpOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathExpOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/MathExpOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathExpOp/MathExp.mlir b/benchmarks/DeepLearning/Ops/MathExpOp/MathExp.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathFpowOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MathFpowOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathFpowOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/MathFpowOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathFpowOp/MathFpow.mlir b/benchmarks/DeepLearning/Ops/MathFpowOp/MathFpow.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathRsqrtOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/MathRsqrtOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathRsqrtOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/MathRsqrtOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/MathRsqrtOp/MathRsqrt.mlir b/benchmarks/DeepLearning/Ops/MathRsqrtOp/MathRsqrt.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/PoolingNhwcSum.mlir b/benchmarks/DeepLearning/Ops/PoolingNhwcSumOp/PoolingNhwcSum.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceAddfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ReduceAddfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceAddfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ReduceAddfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceAddfOp/ReduceAddf.mlir b/benchmarks/DeepLearning/Ops/ReduceAddfOp/ReduceAddf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceMaxfOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/ReduceMaxfOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceMaxfOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/ReduceMaxfOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/ReduceMaxfOp/ReduceMaxf.mlir b/benchmarks/DeepLearning/Ops/ReduceMaxfOp/ReduceMaxf.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/GoogleBenchmarkMain.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/SoftmaxExpSumDiv.mlir b/benchmarks/DeepLearning/Ops/SoftmaxExpSumDivOp/SoftmaxExpSumDiv.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/TransposeOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/Main.cpp b/benchmarks/DeepLearning/Ops/TransposeOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/Transpose2D.mlir b/benchmarks/DeepLearning/Ops/TransposeOp/Transpose2D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/Ops/TransposeOp/Utils.hpp b/benchmarks/DeepLearning/Ops/TransposeOp/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/DeepLearning/README.md b/benchmarks/DeepLearning/README.md old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/CMakeLists.txt b/benchmarks/Gemmini/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/CMakeLists.txt b/benchmarks/Gemmini/Ops/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/MatMulOp/CMakeLists.txt b/benchmarks/Gemmini/Ops/MatMulOp/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c b/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/MatMulOp/ExoUtils.h b/benchmarks/Gemmini/Ops/MatMulOp/ExoUtils.h old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/MatMulOp/Main.cpp b/benchmarks/Gemmini/Ops/MatMulOp/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Ops/MatMulOp/matmul.mlir b/benchmarks/Gemmini/Ops/MatMulOp/matmul.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/README.md b/benchmarks/Gemmini/README.md old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/.gitattributes b/benchmarks/Gemmini/ResNet-101/.gitattributes old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/CMakeLists.txt b/benchmarks/Gemmini/ResNet-101/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/CRunnerUtils.cpp b/benchmarks/Gemmini/ResNet-101/CRunnerUtils.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/Main.cpp b/benchmarks/Gemmini/ResNet-101/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/ResNet101.mlir b/benchmarks/Gemmini/ResNet-101/ResNet101.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/images/Cat.h b/benchmarks/Gemmini/ResNet-101/images/Cat.h old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/images/Cat.jpg b/benchmarks/Gemmini/ResNet-101/images/Cat.jpg old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/ResNet-101/include/Labels.h b/benchmarks/Gemmini/ResNet-101/include/Labels.h old mode 100644 new mode 100755 diff --git a/benchmarks/Gemmini/Utils.h b/benchmarks/Gemmini/Utils.h old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/BuddyConv2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyConv2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/BuddyCorr2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyCorr2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/BuddyMorph2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyMorph2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/BuddyResize2DBenchmark.cpp b/benchmarks/ImageProcessing/BuddyResize2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/CMakeLists.txt b/benchmarks/ImageProcessing/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/EigenConvolve2DBenchmark.cpp b/benchmarks/ImageProcessing/EigenConvolve2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu.png b/benchmarks/ImageProcessing/Images/YuTu.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu1022.png b/benchmarks/ImageProcessing/Images/YuTu1022.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu1024.png b/benchmarks/ImageProcessing/Images/YuTu1024.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu128.png b/benchmarks/ImageProcessing/Images/YuTu128.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu16.png b/benchmarks/ImageProcessing/Images/YuTu16.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu18.png b/benchmarks/ImageProcessing/Images/YuTu18.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu256.png b/benchmarks/ImageProcessing/Images/YuTu256.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu32.png b/benchmarks/ImageProcessing/Images/YuTu32.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu4.png b/benchmarks/ImageProcessing/Images/YuTu4.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu512.png b/benchmarks/ImageProcessing/Images/YuTu512.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu6.png b/benchmarks/ImageProcessing/Images/YuTu6.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu64.png b/benchmarks/ImageProcessing/Images/YuTu64.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Images/YuTu8.png b/benchmarks/ImageProcessing/Images/YuTu8.png old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/MLIRConv2D.mlir b/benchmarks/ImageProcessing/MLIRConv2D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/MLIRConv2DBenchmark.cpp b/benchmarks/ImageProcessing/MLIRConv2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/Main.cpp b/benchmarks/ImageProcessing/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/OpenCVFilter2DBenchmark.cpp b/benchmarks/ImageProcessing/OpenCVFilter2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/OpenCVMorph2DBenchmark.cpp b/benchmarks/ImageProcessing/OpenCVMorph2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/OpenCVResize2DBenchmark.cpp b/benchmarks/ImageProcessing/OpenCVResize2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/ImageProcessing/include/Kernels.h b/benchmarks/ImageProcessing/include/Kernels.h old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/CMakeLists.txt b/benchmarks/OpOptimization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/CMakeLists.txt b/benchmarks/OpOptimization/Conv2dNchwFchw/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchw.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchw.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBenchmark.cpp b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBroadcast.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwBroadcast.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwIm2col.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwIm2col.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwWinagrad.mlir b/benchmarks/OpOptimization/Conv2dNchwFchw/Conv2DNchwFchwWinagrad.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/Conv2dNchwFchw/Main.cpp b/benchmarks/OpOptimization/Conv2dNchwFchw/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/CMakeLists.txt b/benchmarks/OpOptimization/MatMul/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/Main.cpp b/benchmarks/OpOptimization/MatMul/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/MatMul.mlir b/benchmarks/OpOptimization/MatMul/MatMul.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/MatMulBenchmark.cpp b/benchmarks/OpOptimization/MatMul/MatMulBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/MatMulBroadcast.mlir b/benchmarks/OpOptimization/MatMul/MatMulBroadcast.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/MatMulTransform.mlir b/benchmarks/OpOptimization/MatMul/MatMulTransform.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/TVM/.gitignore b/benchmarks/OpOptimization/MatMul/TVM/.gitignore old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/TVM/main.py b/benchmarks/OpOptimization/MatMul/TVM/main.py old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/TVM/matmul_autotvm.py b/benchmarks/OpOptimization/MatMul/TVM/matmul_autotvm.py old mode 100644 new mode 100755 diff --git a/benchmarks/OpOptimization/MatMul/TVM/matmul_manual.py b/benchmarks/OpOptimization/MatMul/TVM/matmul_manual.py old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/CMakeLists.txt b/benchmarks/Vectorization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/MLIRMatMul.mlir b/benchmarks/Vectorization/MLIRMatMul.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/MLIRMatMulBenchmark.cpp b/benchmarks/Vectorization/MLIRMatMulBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/MLIRMatVec.mlir b/benchmarks/Vectorization/MLIRMatVec.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/MLIRMatVecBenchmark.cpp b/benchmarks/Vectorization/MLIRMatVecBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/Main.cpp b/benchmarks/Vectorization/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/CMakeLists.txt b/benchmarks/Vectorization/gccloops/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10a.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10a.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10aBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10aBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10b.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10b.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10bBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx10bBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx11Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx12Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx13Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx14Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx1Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx21Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx23Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx24Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx25Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2a.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2a.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2aBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2aBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2b.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2b.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2bBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx2bBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx3Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4a.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4a.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4aBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4aBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4b.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4b.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4bBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4bBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4c.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4c.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4cBenchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx4cBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx7Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx8Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9.mlir b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9Benchmark.cpp b/benchmarks/Vectorization/gccloops/MLIRGccLoopsEx9Benchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/gccloops/Main.cpp b/benchmarks/Vectorization/gccloops/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/CMakeLists.txt b/benchmarks/Vectorization/linpackc/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyBenchmark.cpp b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF32.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF32.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF64.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyRollF64.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF32.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF32.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF64.mlir b/benchmarks/Vectorization/linpackc/MLIRLinpackCDaxpyUnrollF64.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/linpackc/Main.cpp b/benchmarks/Vectorization/linpackc/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/CMakeLists.txt b/benchmarks/Vectorization/polybench/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench2mm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybench2mm.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench2mmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybench2mmBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench3mm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybench3mm.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybench3mmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybench3mmBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAdi.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchAdi.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAdiBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchAdiBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAtax.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchAtax.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchAtaxBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchAtaxBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchBicg.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchBicg.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchBicgBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchBicgBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCholesky.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchCholesky.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCholeskyBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchCholeskyBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelation.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelation.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelationBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchCorrelationBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCovariance.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchCovariance.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchCovarianceBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchCovarianceBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDeriche.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchDeriche.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDericheBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchDericheBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgen.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgen.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgenBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchDoitgenBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDurbin.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchDurbin.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchDurbinBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchDurbinBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchFdtd2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshall.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshall.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshallBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchFloydWarshallBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGemm.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGemmBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemver.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGemver.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGemverBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGemverBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGesummv.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGesummv.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGesummvBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGesummvBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidt.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidt.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidtBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchGramschmidtBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchHeat3DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi1DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchJacobi2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLu.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchLu.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLuBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchLuBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmp.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmp.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmpBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchLudcmpBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchMvt.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchMvt.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchMvtBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchMvtBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchNussinov.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchNussinov.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchNussinovBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchNussinovBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2D.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2D.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2DBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSeidel2DBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSymm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSymm.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSymmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSymmBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2k.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2k.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2kBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSyr2kBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyrk.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchSyrk.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchSyrkBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchSyrkBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolv.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolv.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolvBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchTrisolvBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrmm.mlir b/benchmarks/Vectorization/polybench/MLIRPolybenchTrmm.mlir old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/MLIRPolybenchTrmmBenchmark.cpp b/benchmarks/Vectorization/polybench/MLIRPolybenchTrmmBenchmark.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/Main.cpp b/benchmarks/Vectorization/polybench/Main.cpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/README.md b/benchmarks/Vectorization/polybench/README.md old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/Utils.hpp b/benchmarks/Vectorization/polybench/Utils.hpp old mode 100644 new mode 100755 diff --git a/benchmarks/Vectorization/polybench/polybench_mlir_gen.py b/benchmarks/Vectorization/polybench/polybench_mlir_gen.py old mode 100644 new mode 100755 diff --git a/cmake/buddy-benchmark.cmake b/cmake/buddy-benchmark.cmake old mode 100644 new mode 100755 diff --git a/cmake/check-simd.cmake b/cmake/check-simd.cmake old mode 100644 new mode 100755 diff --git a/docs/ConvAlgorithms.md b/docs/ConvAlgorithms.md old mode 100644 new mode 100755 diff --git a/docs/DeepLearningBenchmark.md b/docs/DeepLearningBenchmark.md old mode 100644 new mode 100755 diff --git a/docs/GemminiConfig.md b/docs/GemminiConfig.md old mode 100644 new mode 100755 diff --git a/docs/Images/CoefficientsBroadcasting.png b/docs/Images/CoefficientsBroadcasting.png old mode 100644 new mode 100755 diff --git a/docs/PrepareRVOpenMP.md b/docs/PrepareRVOpenMP.md old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 diff --git a/scripts/logs2html.py b/scripts/logs2html.py old mode 100644 new mode 100755 diff --git a/scripts/run_docker.sh b/scripts/run_docker.sh old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/build_results_summary.log b/test_result/deeplearning/build_results_summary.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.log b/test_result/deeplearning/dl-model-resnet18-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.log b/test_result/deeplearning/dl-model-tinyllama-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.log b/test_result/deeplearning/dl-model-whisper-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/run_results_summary.log b/test_result/deeplearning/run_results_summary.log old mode 100644 new mode 100755 diff --git a/test_result/geminiprocessing/build.log b/test_result/geminiprocessing/build.log old mode 100644 new mode 100755 diff --git a/test_result/geminiprocessing/cmake_configure.log b/test_result/geminiprocessing/cmake_configure.log old mode 100644 new mode 100755 diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log old mode 100644 new mode 100755 diff --git a/thirdparty/README.md b/thirdparty/README.md old mode 100644 new mode 100755 diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/utils/plots/CMakeLists.txt b/utils/plots/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/utils/plots/Main.cpp b/utils/plots/Main.cpp old mode 100644 new mode 100755 diff --git a/utils/plots/python/plot.py b/utils/plots/python/plot.py old mode 100644 new mode 100755 diff --git a/utils/plots/python/plotools/.gitignore b/utils/plots/python/plotools/.gitignore old mode 100644 new mode 100755 diff --git a/utils/plots/python/plotools/__init__.py b/utils/plots/python/plotools/__init__.py old mode 100644 new mode 100755 diff --git a/utils/plots/python/plotools/compare.py b/utils/plots/python/plotools/compare.py old mode 100644 new mode 100755 diff --git a/utils/plots/source_dir.h.in b/utils/plots/source_dir.h.in old mode 100644 new mode 100755 diff --git a/validation/AudioProcessing/AudioValidationLib.cpp b/validation/AudioProcessing/AudioValidationLib.cpp old mode 100644 new mode 100755 diff --git a/validation/AudioProcessing/CMakeLists.txt b/validation/AudioProcessing/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/validation/CMakeLists.txt b/validation/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/validation/Python/.gitignore b/validation/Python/.gitignore old mode 100644 new mode 100755 diff --git a/validation/Python/__init__.py b/validation/Python/__init__.py old mode 100644 new mode 100755 diff --git a/validation/Python/audio/__init__.py b/validation/Python/audio/__init__.py old mode 100644 new mode 100755 diff --git a/validation/Python/audio/audio_file.py b/validation/Python/audio/audio_file.py old mode 100644 new mode 100755 diff --git a/validation/Python/audio/audio_test.py b/validation/Python/audio/audio_test.py old mode 100644 new mode 100755 diff --git a/validation/Python/audio/fir.py b/validation/Python/audio/fir.py old mode 100644 new mode 100755 diff --git a/validation/Python/main.py b/validation/Python/main.py old mode 100644 new mode 100755 diff --git a/validation/Python/requirements.txt b/validation/Python/requirements.txt old mode 100644 new mode 100755 diff --git a/validation/Python/utils/__init__.py b/validation/Python/utils/__init__.py old mode 100644 new mode 100755 diff --git a/validation/Python/utils/audio_format.py b/validation/Python/utils/audio_format.py old mode 100644 new mode 100755 diff --git a/validation/Python/utils/lib_path.py b/validation/Python/utils/lib_path.py old mode 100644 new mode 100755 diff --git a/validation/README.md b/validation/README.md old mode 100644 new mode 100755 From ca204be97d6b780b4d366ab221e3591fdd5dbc75 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 25 May 2025 20:30:38 +0200 Subject: [PATCH 06/52] test --- .github/workflows/bench.yml | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 29e2b5ee..580ede14 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -17,22 +17,21 @@ jobs: # ------------------------------------------------------------ # 1) check out the two source trees side-by-side # ------------------------------------------------------------ - - uses: actions/checkout@v4 - with: {path: buddy-benchmark} - - - uses: actions/checkout@v4 - with: - repository: BuddyCompiler/buddy-mlir # adjust if fork - path: buddy-mlir + - name: Update local clones + run: | + set -e + cd /home/quliu/buddy-complier-workspace/buddy-benchmark + git pull --ff-only # ------------------------------------------------------------ # 2) run everything in the Docker sandbox # ------------------------------------------------------------ - - name: Build & run benchmarks in container - working-directory: buddy-benchmark + - name: Build & run benchmarks run: | - chmod +x scripts/run_docker.sh - scripts/run_docker.sh + cd /home/quliu/buddy-complier-workspace/ + chmod +x /home/quliu/buddy-complier-workspace/buddy-benchmark/scripts/run_docker.sh + /home/quliu/buddy-complier-workspace/buddy-benchmark/scripts/run_docker.sh + # after the script we have ./test_result in the workspace --------- From 3a4f74c245a15f5bdd734fb5c919c099fcc23d4c Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 25 May 2025 20:39:10 +0200 Subject: [PATCH 07/52] ci: run benchmarks inside reusable docker container --- .github/workflows/bench.yml | 38 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 580ede14..b4546327 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -1,65 +1,59 @@ +# .github/workflows/bench.yml name: Buddy-Benchmark CI on: - push: - branches: ['**'] # or whatever branch should trigger the run + push: # fire on any branch + branches: ['**'] pull_request: jobs: bench: - runs-on: self-hosted # your own actions-runner machine + runs-on: self-hosted permissions: contents: read pages: write - id-token: write # required by Pages deploy + id-token: write steps: # ------------------------------------------------------------ - # 1) check out the two source trees side-by-side + # 1) update the two local clones so they match the commit that + # triggered the run (fast-forward only for safety) # ------------------------------------------------------------ - name: Update local clones run: | set -e - cd /home/quliu/buddy-complier-workspace/buddy-benchmark - git pull --ff-only + for dir in buddy-benchmark buddy-mlir; do + cd /home/quliu/buddy-complier-workspace/$dir + git remote update + git pull --ff-only + done # ------------------------------------------------------------ # 2) run everything in the Docker sandbox # ------------------------------------------------------------ - name: Build & run benchmarks run: | - cd /home/quliu/buddy-complier-workspace/ - chmod +x /home/quliu/buddy-complier-workspace/buddy-benchmark/scripts/run_docker.sh /home/quliu/buddy-complier-workspace/buddy-benchmark/scripts/run_docker.sh - - # after the script we have ./test_result in the workspace --------- - # ------------------------------------------------------------ - # 3) keep a raw download for debugging + # 3-5) (UNCHANGED) upload raw logs, convert to HTML, deploy Pages # ------------------------------------------------------------ - name: Upload raw logs as artifact uses: actions/upload-artifact@v4 with: name: vectorization-logs-${{ github.sha }} - path: buddy-benchmark/test_result + path: /home/quliu/buddy-complier-workspace/buddy-benchmark/test_result retention-days: 30 - # ------------------------------------------------------------ - # 4) convert *.log → HTML - # ------------------------------------------------------------ - name: Build mini-site - working-directory: buddy-benchmark + working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark run: | python scripts/logs2html.py test_result site - # ------------------------------------------------------------ - # 5) publish the site to GitHub Pages - # ------------------------------------------------------------ - name: Upload site artifact uses: actions/upload-pages-artifact@v3 with: - path: buddy-benchmark/site + path: /home/quliu/buddy-complier-workspace/buddy-benchmark/site - name: Deploy to Pages id: deploy From 2cae659900a6eb4a18ebeceef8432c3d79b22205 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 25 May 2025 20:40:53 +0200 Subject: [PATCH 08/52] ci: run benchmarks inside reusable docker container --- .github/workflows/bench.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index b4546327..b15cee4d 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -33,7 +33,7 @@ jobs: # ------------------------------------------------------------ - name: Build & run benchmarks run: | - /home/quliu/buddy-complier-workspace/buddy-benchmark/scripts/run_docker.sh + /home/quliu/buddy-complier-workspace/run_docker.sh # ------------------------------------------------------------ # 3-5) (UNCHANGED) upload raw logs, convert to HTML, deploy Pages From c54f5d33d49db967f509d745e690bbd62eac2e2e Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 25 May 2025 21:10:27 +0200 Subject: [PATCH 09/52] ci: run benchmarks inside reusable docker container --- .github/workflows/bench.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index b15cee4d..cbb17342 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -61,3 +61,4 @@ jobs: outputs: page_url: ${{ steps.deploy.outputs.page_url }} + From 505884b84c86e98486500bb723910a0a07d1b28c Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 25 May 2025 21:53:22 +0200 Subject: [PATCH 10/52] ci: run benchmarks inside reusable docker container --- .../vectorization/vectorization_result.log | 6 +++ test/test_script_vectorizationprocessing.sh | 15 ++++--- .../vectorization/vectorization_result.log | 39 ++++++++++--------- 3 files changed, 35 insertions(+), 25 deletions(-) create mode 100644 test/test_result/vectorization/vectorization_result.log diff --git a/test/test_result/vectorization/vectorization_result.log b/test/test_result/vectorization/vectorization_result.log new file mode 100644 index 00000000..b0ff4d66 --- /dev/null +++ b/test/test_result/vectorization/vectorization_result.log @@ -0,0 +1,6 @@ +Vectorization Benchmark - Sun May 25 19:22:50 UTC 2025 +[Info] Running CMake configuration... +CMake Error: The source directory "/home/buddy-complier-workspace/buddy-benchmark/test" does not appear to contain CMakeLists.txt. +Specify --help for usage, or press the help button on the CMake GUI. +[Info] Building vectorization-matrix-benchmark... +ninja: error: loading 'build.ninja': No such file or directory diff --git a/test/test_script_vectorizationprocessing.sh b/test/test_script_vectorizationprocessing.sh index 4fccc193..5a3e8584 100755 --- a/test/test_script_vectorizationprocessing.sh +++ b/test/test_script_vectorizationprocessing.sh @@ -1,11 +1,11 @@ #!/usr/bin/env bash -apt update -apt install -y libc6-riscv64-cross -apt install -y \ - libc6-riscv64-cross \ - libstdc++6-riscv64-cross \ - libgcc-s1-riscv64-cross +# apt update +# apt install -y libc6-riscv64-cross +# apt install -y \ +# libc6-riscv64-cross \ +# libstdc++6-riscv64-cross \ +# libgcc-s1-riscv64-cross ################################################################################ # 1. Script Setup ################################################################################ @@ -30,6 +30,9 @@ echo "Vectorization Benchmark - $(date)" > "${LOG_FILE}" ################################################################################ # 2. Build Benchmark ################################################################################ +cd /home/buddy-complier-workspace/buddy-benchmark +echo "[Info] Starting vectorization-matrix-benchmark build..." | tee -a "${LOG_FILE}" +rm -rf build mkdir -p build && cd build echo "[Info] Running CMake configuration..." | tee -a "${LOG_FILE}" cmake -G Ninja .. \ diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log index a5ec2b95..e7431d95 100755 --- a/test_result/vectorization/vectorization_result.log +++ b/test_result/vectorization/vectorization_result.log @@ -1,4 +1,5 @@ -Vectorization Benchmark - Sun May 25 16:08:49 UTC 2025 +Vectorization Benchmark - Sun May 25 19:53:00 UTC 2025 +[Info] Starting vectorization-matrix-benchmark build... [Info] Running CMake configuration... -- The CXX compiler identification is GNU 11.4.0 -- The C compiler identification is GNU 11.4.0 @@ -129,21 +130,21 @@ Call Stack (most recent call first): [10/17] Performing build step for 'project_googlebenchmark' [1/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o [2/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o -[3/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o -[4/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o -[5/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o -[6/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o -[7/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o -[8/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o -[9/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[3/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o +[4/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o +[7/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o +[9/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o [10/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o -[11/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o +[11/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o [12/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o -[13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o -[14/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[13/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o [15/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o -[16/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o -[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[17/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o [18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o [19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o [20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o @@ -175,11 +176,11 @@ Call Stack (most recent call first): [12/17] No test step for 'project_googlebenchmark' [13/17] Completed 'project_googlebenchmark' [14/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/Main.cpp.o -[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o -[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o +[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o +[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o [17/17] Linking CXX executable bin/vectorization-matrix-benchmark [Info] Running vectorization-matrix-benchmark... -2025-05-25T16:08:56+00:00 +2025-05-25T19:53:07+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -187,13 +188,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 0.25, 6.84, 12.60 +Load Average: 0.33, 0.10, 1.62 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 19.5 ns 19.5 ns 36611256 -MLIR_MatVec/1 20.7 ns 20.7 ns 34849004 +MLIR_MatMul/1 18.7 ns 18.7 ns 37229964 +MLIR_MatVec/1 20.5 ns 20.5 ns 34308357 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] From 1684057a1dfdfa795748adc4681ffd54d02a8b55 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Mon, 26 May 2025 16:33:08 +0200 Subject: [PATCH 11/52] ci: run benchmarks inside reusable docker container --- .github/workflows/bench.yml | 2 +- .../build_results_crosscompile_summary.html | 30 + site/deeplearning/build_results_summary.html | 30 + site/deeplearning/dl-layer-ffn-benchmark.html | 19 + .../dl-layer-rmsnorm-benchmark.html | 19 + .../dl-layer-selfattention-benchmark.html | 19 + .../dl-model-lenet-benchmark.html | 20 + .../dl-model-mobilenetv3-benchmark.html | 20 + .../dl-model-resnet18-benchmark.html | 19 + .../dl-model-tinyllama-benchmark.html | 20 + .../dl-model-whisper-benchmark.html | 20 + .../dl-op-linalg-arithaddf-benchmark.html | 20 + .../dl-op-linalg-arithdivf-benchmark.html | 20 + .../dl-op-linalg-arithmulf-benchmark.html | 20 + .../dl-op-linalg-arithnegf-benchmark.html | 20 + .../dl-op-linalg-arithsubf-benchmark.html | 20 + .../dl-op-linalg-batch-matmul-benchmark.html | 26 + ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 20 + ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 22 + ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 20 + ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 20 + .../dl-op-linalg-mathexp-benchmark.html | 20 + .../dl-op-linalg-mathfpow-benchmark.html | 20 + .../dl-op-linalg-mathrsqrt-benchmark.html | 20 + .../dl-op-linalg-matmul-benchmark.html | 23 + ...-op-linalg-pooling-nhwc-sum-benchmark.html | 20 + .../dl-op-linalg-reduceaddf-benchmark.html | 11 + .../dl-op-linalg-reducemaxf-benchmark.html | 11 + ...-linalg-softmax-exp-sum-div-benchmark.html | 20 + .../dl-op-matmul-transpose-b-benchmark.html | 22 + .../dl-op-tosa-transpose-benchmark.html | 18 + site/deeplearning/run_results_summary.html | 30 + site/geminiprocessing/build.html | 656 ++++++++++++++++++ site/geminiprocessing/cmake_configure.html | 38 + site/index.html | 34 + site/vectorization/vectorization_result.html | 204 ++++++ .../vectorization/vectorization_result.log | 40 +- 37 files changed, 1592 insertions(+), 21 deletions(-) create mode 100644 site/deeplearning/build_results_crosscompile_summary.html create mode 100644 site/deeplearning/build_results_summary.html create mode 100644 site/deeplearning/dl-layer-ffn-benchmark.html create mode 100644 site/deeplearning/dl-layer-rmsnorm-benchmark.html create mode 100644 site/deeplearning/dl-layer-selfattention-benchmark.html create mode 100644 site/deeplearning/dl-model-lenet-benchmark.html create mode 100644 site/deeplearning/dl-model-mobilenetv3-benchmark.html create mode 100644 site/deeplearning/dl-model-resnet18-benchmark.html create mode 100644 site/deeplearning/dl-model-tinyllama-benchmark.html create mode 100644 site/deeplearning/dl-model-whisper-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-arithaddf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-arithdivf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-arithmulf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-arithnegf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-arithsubf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-mathexp-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-mathfpow-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-matmul-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html create mode 100644 site/deeplearning/dl-op-matmul-transpose-b-benchmark.html create mode 100644 site/deeplearning/dl-op-tosa-transpose-benchmark.html create mode 100644 site/deeplearning/run_results_summary.html create mode 100644 site/geminiprocessing/build.html create mode 100644 site/geminiprocessing/cmake_configure.html create mode 100644 site/index.html create mode 100644 site/vectorization/vectorization_result.html diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index cbb17342..ba1b0ea1 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -48,7 +48,7 @@ jobs: - name: Build mini-site working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark run: | - python scripts/logs2html.py test_result site + python3 scripts/logs2html.py test_result site - name: Upload site artifact uses: actions/upload-pages-artifact@v3 diff --git a/site/deeplearning/build_results_crosscompile_summary.html b/site/deeplearning/build_results_crosscompile_summary.html new file mode 100644 index 00000000..d7779793 --- /dev/null +++ b/site/deeplearning/build_results_crosscompile_summary.html @@ -0,0 +1,30 @@ +

    deeplearning/build_results_crosscompile_summary.log

    2025-05-26 14:27:29 UTC

    [Failed]  Build of 'dl-model-tinyllama-benchmark'
    +[Failed]  Build of 'dl-model-mobilenetv3-benchmark'
    +[Success] Build of 'dl-model-lenet-benchmark'
    +[Failed]  Build of 'dl-model-bert-benchmark'
    +[Failed]  Build of 'dl-model-whisper-benchmark'
    +[Failed]  Build of 'dl-model-resnet18-benchmark'
    +[Success] Build of 'dl-layer-ffn-benchmark'
    +[Success] Build of 'dl-layer-selfattention-benchmark'
    +[Success] Build of 'dl-layer-rmsnorm-benchmark'
    +[Failed]  Build of 'dl-op-linalg-matmul-benchmark'
    +[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark'
    +[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark'
    +[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark'
    +[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark'
    +[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark'
    +[Failed]  Build of 'dl-op-linalg-batch-matmul-benchmark'
    +[Success] Build of 'dl-op-linalg-arithaddf-benchmark'
    +[Success] Build of 'dl-op-linalg-arithdivf-benchmark'
    +[Success] Build of 'dl-op-linalg-arithmulf-benchmark'
    +[Success] Build of 'dl-op-linalg-arithnegf-benchmark'
    +[Success] Build of 'dl-op-linalg-arithsubf-benchmark'
    +[Success] Build of 'dl-op-linalg-mathfpow-benchmark'
    +[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark'
    +[Success] Build of 'dl-op-linalg-mathexp-benchmark'
    +[Success] Build of 'dl-op-linalg-reduceaddf-benchmark'
    +[Success] Build of 'dl-op-linalg-reducemaxf-benchmark'
    +[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark'
    +[Failed]  Build of 'dl-op-tosa-transpose-benchmark'
    +[Failed]  Build of 'dl-op-matmul-transpose-b-benchmark'
    +
    \ No newline at end of file diff --git a/site/deeplearning/build_results_summary.html b/site/deeplearning/build_results_summary.html new file mode 100644 index 00000000..d3236314 --- /dev/null +++ b/site/deeplearning/build_results_summary.html @@ -0,0 +1,30 @@ +

    deeplearning/build_results_summary.log

    2025-05-26 14:27:29 UTC

    [Success] Build of 'dl-model-tinyllama-benchmark'
    +[Success] Build of 'dl-model-mobilenetv3-benchmark'
    +[Success] Build of 'dl-model-lenet-benchmark'
    +[Failed]  Build of 'dl-model-bert-benchmark'
    +[Success] Build of 'dl-model-whisper-benchmark'
    +[Success] Build of 'dl-model-resnet18-benchmark'
    +[Success] Build of 'dl-layer-ffn-benchmark'
    +[Success] Build of 'dl-layer-selfattention-benchmark'
    +[Success] Build of 'dl-layer-rmsnorm-benchmark'
    +[Success] Build of 'dl-op-linalg-matmul-benchmark'
    +[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark'
    +[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark'
    +[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark'
    +[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark'
    +[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark'
    +[Success] Build of 'dl-op-linalg-batch-matmul-benchmark'
    +[Success] Build of 'dl-op-linalg-arithaddf-benchmark'
    +[Success] Build of 'dl-op-linalg-arithdivf-benchmark'
    +[Success] Build of 'dl-op-linalg-arithmulf-benchmark'
    +[Success] Build of 'dl-op-linalg-arithnegf-benchmark'
    +[Success] Build of 'dl-op-linalg-arithsubf-benchmark'
    +[Success] Build of 'dl-op-linalg-mathfpow-benchmark'
    +[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark'
    +[Success] Build of 'dl-op-linalg-mathexp-benchmark'
    +[Success] Build of 'dl-op-linalg-reduceaddf-benchmark'
    +[Success] Build of 'dl-op-linalg-reducemaxf-benchmark'
    +[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark'
    +[Success] Build of 'dl-op-tosa-transpose-benchmark'
    +[Success] Build of 'dl-op-matmul-transpose-b-benchmark'
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html new file mode 100644 index 00000000..7d027ff7 --- /dev/null +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -0,0 +1,19 @@ +

    deeplearning/dl-layer-ffn-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:30+00:00
    +Running ./dl-layer-ffn-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.14, 3.58
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------------------------
    +Benchmark                                Time             CPU   Iterations
    +--------------------------------------------------------------------------
    +DL_LAYER_FFN/Scalar                  0.065 ms        0.065 ms        10714
    +DL_LAYER_FFN/Auto_Vectorization      0.027 ms        0.027 ms        25753
    +-----------------------------------------------------------
    +Correctness Verification: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html new file mode 100644 index 00000000..cf717809 --- /dev/null +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -0,0 +1,19 @@ +

    deeplearning/dl-layer-rmsnorm-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:34+00:00
    +Running ./dl-layer-rmsnorm-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.13, 3.57
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +------------------------------------------------------------------------------
    +Benchmark                                    Time             CPU   Iterations
    +------------------------------------------------------------------------------
    +DL_LAYER_RMSNORM/Scalar                  0.002 ms        0.002 ms       360260
    +DL_LAYER_RMSNORM/Auto_Vectorization      0.001 ms        0.001 ms       748474
    +-----------------------------------------------------------
    +Correctness Verification: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html new file mode 100644 index 00000000..48d5e9be --- /dev/null +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -0,0 +1,19 @@ +

    deeplearning/dl-layer-selfattention-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:32+00:00
    +Running ./dl-layer-selfattention-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.14, 3.58
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------------------------------
    +Benchmark                                      Time             CPU   Iterations
    +--------------------------------------------------------------------------------
    +DL_LAYER_ATTENTION/Scalar                   4.68 ms         4.68 ms          150
    +DL_LAYER_ATTENTION/Auto_Vectorization       1.57 ms         1.57 ms          446
    +-----------------------------------------------------------
    +Correctness Verification: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html new file mode 100644 index 00000000..9011db4e --- /dev/null +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-model-lenet-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:29:36+00:00
    +Running ./dl-model-lenet-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.32, 4.34
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +-----------------------------------------------------------------------------
    +Benchmark                                   Time             CPU   Iterations
    +-----------------------------------------------------------------------------
    +DL_MODEL_LENET/Auto_Vectorization       0.152 ms        0.152 ms         4530
    +DL_MODEL_LENET/Buddy_Vectorization      0.136 ms        0.136 ms         5149
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html new file mode 100644 index 00000000..7109b91f --- /dev/null +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-model-mobilenetv3-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:29:34+00:00
    +Running ./dl-model-mobilenetv3-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.32, 4.34
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +-----------------------------------------------------------------------------------
    +Benchmark                                         Time             CPU   Iterations
    +-----------------------------------------------------------------------------------
    +BM_MobileNet_V3/BM_MobileNet_V3_scalar         35.1 ms         35.1 ms           20
    +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt       32.0 ms         32.0 ms           22
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html new file mode 100644 index 00000000..194a6cc9 --- /dev/null +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -0,0 +1,19 @@ +

    deeplearning/dl-model-resnet18-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:27+00:00
    +Running ./dl-model-resnet18-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.14, 3.59
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------------------------------
    +Benchmark                                      Time             CPU   Iterations
    +--------------------------------------------------------------------------------
    +DL_MODEL_Resnet18/Auto_Vectorization         720 ms          720 ms            1
    +DL_MODEL_Resnet18/Buddy_Vectorization        719 ms          719 ms            1
    +-----------------------------------------------------------
    +Correctness Verification: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html new file mode 100644 index 00000000..8e18ec1d --- /dev/null +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-model-tinyllama-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:24:01+00:00
    +Running ./dl-model-tinyllama-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.79, 2.00, 5.81
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +----------------------------------------------------------------------------
    +Benchmark                                  Time             CPU   Iterations
    +----------------------------------------------------------------------------
    +DL_MODEL_TINYLLAMA/scalar             160502 ms       160495 ms            1
    +DL_MODEL_TINYLLAMA/matmul_opt           9595 ms         9595 ms            1
    +DL_MODEL_TINYLLAMA/matmul_opt_omp       7607 ms         6928 ms            1
    +---------- Verification ----------
    +matmul_opt PASS
    +matmul_opt_omp PASS
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html new file mode 100644 index 00000000..75b3b90d --- /dev/null +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-model-whisper-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:29:38+00:00
    +Running ./dl-model-whisper-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.31, 4.32
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +-------------------------------------------------------------------------------
    +Benchmark                                     Time             CPU   Iterations
    +-------------------------------------------------------------------------------
    +DL_MODEL_Whisper/Auto_Vectorization       77089 ms        77086 ms            1
    +DL_MODEL_Whisper/Buddy_Vectorization      35954 ms        35953 ms            1
    +-----------------------------------------------------------
    +Correctness Verification for Output1: PASS
    +Correctness Verification for Output2: FAIL
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html new file mode 100644 index 00000000..cb45e65e --- /dev/null +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-arithaddf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:03+00:00
    +Running ./dl-op-linalg-arithaddf-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.12, 3.48
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------------------
    +Benchmark                          Time             CPU   Iterations
    +--------------------------------------------------------------------
    +BM_ADDF_SCALAR                 0.030 ms        0.030 ms        23576
    +BM_ADDF_AutoVectorization      0.004 ms        0.004 ms       174965
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html new file mode 100644 index 00000000..0e383307 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-arithdivf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:05+00:00
    +Running ./dl-op-linalg-arithdivf-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.12, 3.48
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------------------
    +Benchmark                          Time             CPU   Iterations
    +--------------------------------------------------------------------
    +BM_DIVF_SCALAR                 0.030 ms        0.030 ms        23149
    +BM_DIVF_AutoVectorization      0.009 ms        0.009 ms        73790
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html new file mode 100644 index 00000000..9f5648c2 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-arithmulf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:07+00:00
    +Running ./dl-op-linalg-arithmulf-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.12, 3.48
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------------------
    +Benchmark                          Time             CPU   Iterations
    +--------------------------------------------------------------------
    +BM_MULF_SCALAR                 0.030 ms        0.030 ms        23959
    +BM_MULF_AutoVectorization      0.004 ms        0.004 ms       175122
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html new file mode 100644 index 00000000..17661c41 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-arithnegf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:09+00:00
    +Running ./dl-op-linalg-arithnegf-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.12, 3.47
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------------------
    +Benchmark                          Time             CPU   Iterations
    +--------------------------------------------------------------------
    +BM_NEGF_SCALAR                 0.023 ms        0.023 ms        30704
    +BM_NEGF_AutoVectorization      0.003 ms        0.003 ms       212512
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html new file mode 100644 index 00000000..c8ce28ec --- /dev/null +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-arithsubf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:11+00:00
    +Running ./dl-op-linalg-arithsubf-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.12, 3.47
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------------------
    +Benchmark                          Time             CPU   Iterations
    +--------------------------------------------------------------------
    +BM_SUBF_SCALAR                 0.030 ms        0.030 ms        23752
    +BM_SUBF_AutoVectorization      0.005 ms        0.005 ms       174941
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html new file mode 100644 index 00000000..d880d8ee --- /dev/null +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -0,0 +1,26 @@ +

    deeplearning/dl-op-linalg-batch-matmul-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:53+00:00
    +Running ./dl-op-linalg-batch-matmul-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.12, 3.51
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +---------------------------------------------------------------------------------------------
    +Benchmark                                                   Time             CPU   Iterations
    +---------------------------------------------------------------------------------------------
    +DL_OPS_BATCH_MATMUL/Scalar/iterations:1                  3525 ms         3525 ms            1
    +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1        974 ms          974 ms            1
    +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1            190 ms          190 ms            1
    +DL_OPS_BATCH_MATMUL/Tile/iterations:1                     109 ms          109 ms            1
    +DL_OPS_BATCH_MATMUL/SCF/iterations:1                      117 ms          117 ms            1
    +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1                352 ms          352 ms            1
    +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1           80.7 ms         53.0 ms            1
    +---------- Verification ----------
    +Tile PASS
    +SCF PASS
    +BROADCAST PASS
    +BROADCAST_OMP PASS
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html new file mode 100644 index 00000000..2b37cbba --- /dev/null +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:46+00:00
    +Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.13, 3.54
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +-------------------------------------------------------------------
    +Benchmark                         Time             CPU   Iterations
    +-------------------------------------------------------------------
    +BM_Conv2DNchwFchw_SCALAR        282 ms          282 ms            2
    +BM_Conv2DNchwFchw_Im2col       8.35 ms         8.35 ms           86
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html new file mode 100644 index 00000000..37d8f93e --- /dev/null +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -0,0 +1,22 @@ +

    deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:50+00:00
    +Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.13, 3.52
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +---------------------------------------------------------------------------------------------------
    +Benchmark                                                         Time             CPU   Iterations
    +---------------------------------------------------------------------------------------------------
    +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5                   72.3 ms         72.3 ms            5
    +DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5       9.34 ms         9.34 ms            5
    +DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5            1.74 ms         1.74 ms            5
    +DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5                 1.73 ms         1.73 ms            5
    +---------- Verification ----------
    +auto_vectorization PASS
    +vectorization PASS
    +vec_tile PASS
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html new file mode 100644 index 00000000..490e872c --- /dev/null +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:48+00:00
    +Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.13, 3.52
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +---------------------------------------------------------------------------------
    +Benchmark                                       Time             CPU   Iterations
    +---------------------------------------------------------------------------------
    +BM_CONV_2D_NHWC_HWCF_SCALAR                  32.3 ms         32.3 ms           22
    +BM_CONV_2D_NHWC_HWCF_AutoVectorization       6.14 ms         6.14 ms          114
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html new file mode 100644 index 00000000..ce8b1d43 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:50+00:00
    +Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.13, 3.52
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +------------------------------------------------------------------------------------------------------------
    +Benchmark                                                                  Time             CPU   Iterations
    +------------------------------------------------------------------------------------------------------------
    +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5                   6.55 ms         6.54 ms            5
    +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5       1.68 ms         1.68 ms            5
    +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5           0.124 ms        0.124 ms            5
    +---------- Verification ----------
    +auto_vectorization PASS
    +vectorization PASS
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html new file mode 100644 index 00000000..f6048848 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-mathexp-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:17+00:00
    +Running ./dl-op-linalg-mathexp-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.11, 3.46
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +-------------------------------------------------------------------
    +Benchmark                         Time             CPU   Iterations
    +-------------------------------------------------------------------
    +BM_EXP_SCALAR                 0.046 ms        0.046 ms        15245
    +BM_EXP_AutoVectorization      0.031 ms        0.031 ms        22544
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html new file mode 100644 index 00000000..0d29ac56 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-mathfpow-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:13+00:00
    +Running ./dl-op-linalg-mathfpow-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.11, 3.46
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------------------
    +Benchmark                          Time             CPU   Iterations
    +--------------------------------------------------------------------
    +BM_FPOW_SCALAR                 0.084 ms        0.084 ms         8153
    +BM_FPOW_AutoVectorization      0.057 ms        0.057 ms        12317
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html new file mode 100644 index 00000000..42e2576f --- /dev/null +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-mathrsqrt-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:15+00:00
    +Running ./dl-op-linalg-mathrsqrt-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.11, 3.46
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +---------------------------------------------------------------------
    +Benchmark                           Time             CPU   Iterations
    +---------------------------------------------------------------------
    +BM_RSQRT_SCALAR                 0.073 ms        0.073 ms         9557
    +BM_RSQRT_AutoVectorization      0.004 ms        0.004 ms       161107
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html new file mode 100644 index 00000000..7eaed5f5 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -0,0 +1,23 @@ +

    deeplearning/dl-op-linalg-matmul-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:36+00:00
    +Running ./dl-op-linalg-matmul-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.13, 3.57
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +-------------------------------------------------------------------------------
    +Benchmark                                     Time             CPU   Iterations
    +-------------------------------------------------------------------------------
    +DL_OPS_MATMUL/scalar_O0/iterations:1       3394 ms         3394 ms            1
    +DL_OPS_MATMUL/scalar_O3/iterations:1       2944 ms         2944 ms            1
    +DL_OPS_MATMUL/tile/iterations:1             120 ms          120 ms            1
    +DL_OPS_MATMUL/vec/iterations:1              139 ms          139 ms            1
    +DL_OPS_MATMUL/vec_omp/iterations:1         67.8 ms         17.8 ms            1
    +---------- Verification ----------
    +tile PASS
    +vec PASS
    +vec_omp PASS
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html new file mode 100644 index 00000000..ae62ac2c --- /dev/null +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:51+00:00
    +Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.13, 3.52
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------------------------------
    +Benchmark                                      Time             CPU   Iterations
    +--------------------------------------------------------------------------------
    +BM_POOLING_NHWC_SUM_SCALAR                 0.233 ms        0.233 ms         2997
    +BM_POOLING_NHWC_SUM_AutoVectorization      0.042 ms        0.042 ms        16895
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html new file mode 100644 index 00000000..22b31c55 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -0,0 +1,11 @@ +

    deeplearning/dl-op-linalg-reduceaddf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:19+00:00
    +Running ./dl-op-linalg-reduceaddf-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.11, 3.44
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html new file mode 100644 index 00000000..22a3fa6b --- /dev/null +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html @@ -0,0 +1,11 @@ +

    deeplearning/dl-op-linalg-reducemaxf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:19+00:00
    +Running ./dl-op-linalg-reducemaxf-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.11, 3.44
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html new file mode 100644 index 00000000..cfbec49b --- /dev/null +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html @@ -0,0 +1,20 @@ +

    deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:19+00:00
    +Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.11, 3.44
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------------------------------
    +Benchmark                                      Time             CPU   Iterations
    +--------------------------------------------------------------------------------
    +BM_SOFTMAXEXPSUMDIV_SCALAR                 0.006 ms        0.006 ms       123343
    +BM_SOFTMAXEXPSUMDIV_AutoVectorization      0.004 ms        0.004 ms       181973
    +-----------------------------------------------------------
    +Correctness Verification:
    +Transform case: PASS
    +-----------------------------------------------------------
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html new file mode 100644 index 00000000..c673eb82 --- /dev/null +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -0,0 +1,22 @@ +

    deeplearning/dl-op-matmul-transpose-b-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:22+00:00
    +Running ./dl-op-matmul-transpose-b-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.11, 3.44
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +-----------------------------------------------------------------------------------------------
    +Benchmark                                                     Time             CPU   Iterations
    +-----------------------------------------------------------------------------------------------
    +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5           1262 ms         1262 ms            5
    +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5            311 ms          311 ms            5
    +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5       33.9 ms         22.0 ms            5
    +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5                 85.3 ms         85.3 ms            5
    +---------- Verification ----------
    +scalar_O3 PASS
    +scalar_O3_omp PASS
    +vec PASS
    +
    \ No newline at end of file diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html new file mode 100644 index 00000000..56ac1ea7 --- /dev/null +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -0,0 +1,18 @@ +

    deeplearning/dl-op-tosa-transpose-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:22+00:00
    +Running ./dl-op-tosa-transpose-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 1.00, 1.11, 3.44
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +-------------------------------------------------------------------------------------
    +Benchmark                                           Time             CPU   Iterations
    +-------------------------------------------------------------------------------------
    +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5       25.6 ms         19.9 ms            5
    +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5       19.1 ms         16.2 ms            5
    +---------- Verification ----------
    +scalar_O3 PASS
    +
    \ No newline at end of file diff --git a/site/deeplearning/run_results_summary.html b/site/deeplearning/run_results_summary.html new file mode 100644 index 00000000..687db649 --- /dev/null +++ b/site/deeplearning/run_results_summary.html @@ -0,0 +1,30 @@ +

    deeplearning/run_results_summary.log

    2025-05-26 14:27:29 UTC

    [Success] Run of 'dl-model-tinyllama-benchmark'
    +[Success] Run of 'dl-model-mobilenetv3-benchmark'
    +[Success] Run of 'dl-model-lenet-benchmark'
    +[Missing] Executable not found for 'dl-model-bert-benchmark'
    +[Success] Run of 'dl-model-whisper-benchmark'
    +[Success] Run of 'dl-model-resnet18-benchmark'
    +[Success] Run of 'dl-layer-ffn-benchmark'
    +[Success] Run of 'dl-layer-selfattention-benchmark'
    +[Success] Run of 'dl-layer-rmsnorm-benchmark'
    +[Success] Run of 'dl-op-linalg-matmul-benchmark'
    +[Success] Run of 'dl-op-linalg-conv2d-nchw-fchw-benchmark'
    +[Success] Run of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark'
    +[Success] Run of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark'
    +[Success] Run of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark'
    +[Success] Run of 'dl-op-linalg-pooling-nhwc-sum-benchmark'
    +[Success] Run of 'dl-op-linalg-batch-matmul-benchmark'
    +[Success] Run of 'dl-op-linalg-arithaddf-benchmark'
    +[Success] Run of 'dl-op-linalg-arithdivf-benchmark'
    +[Success] Run of 'dl-op-linalg-arithmulf-benchmark'
    +[Success] Run of 'dl-op-linalg-arithnegf-benchmark'
    +[Success] Run of 'dl-op-linalg-arithsubf-benchmark'
    +[Success] Run of 'dl-op-linalg-mathfpow-benchmark'
    +[Success] Run of 'dl-op-linalg-mathrsqrt-benchmark'
    +[Success] Run of 'dl-op-linalg-mathexp-benchmark'
    +[Failed]  Run of 'dl-op-linalg-reduceaddf-benchmark'
    +[Failed]  Run of 'dl-op-linalg-reducemaxf-benchmark'
    +[Success] Run of 'dl-op-linalg-softmax-exp-sum-div-benchmark'
    +[Success] Run of 'dl-op-tosa-transpose-benchmark'
    +[Success] Run of 'dl-op-matmul-transpose-b-benchmark'
    +
    \ No newline at end of file diff --git a/site/geminiprocessing/build.html b/site/geminiprocessing/build.html new file mode 100644 index 00000000..742b1602 --- /dev/null +++ b/site/geminiprocessing/build.html @@ -0,0 +1,656 @@ +

    geminiprocessing/build.log

    2025-05-26 14:27:29 UTC

    [1/21] Creating directories for 'project_googlebenchmark'
    +[2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o
    +FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o 
    +riscv64-unknown-linux-gnu-gcc  -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4':
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given
    +   28 |   gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0);
    +      |                                               ^
    +In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23:
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:251: note: macro "gemmini_extended_config_ex" defined here
    +  251 | #define gemmini_extended_config_ex(dataflow, sys_act, sys_shift, relu6_shift, A_stride, A_transpose, B_transpose) \
    +      | 
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: error: 'gemmini_extended_config_ex' undeclared (first use in this function)
    +   28 |   gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0);
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: note: each undeclared identifier is reported only once for each function it appears in
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:35:18: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    +   35 |   int32_t *res = (int32_t*) ((uint32_t)gemm_acc_malloc (16 * 16 * 4 * 4 * sizeof(int32_t)));
    +      |                  ^
    +In file included from /home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:20,
    +                 from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23:
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   66 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload'
    +   66 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:119: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   66 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                       ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload'
    +   66 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   67 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   67 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   68 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload'
    +   68 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:125: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   68 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                             ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload'
    +   68 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   69 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   69 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   70 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload'
    +   70 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   70 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                     ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload'
    +   70 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   71 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   71 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   72 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload'
    +   72 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   72 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                     ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload'
    +   72 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   73 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   73 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   74 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload'
    +   74 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:126: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   74 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                              ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload'
    +   74 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   75 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   75 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   76 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload'
    +   76 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:132: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   76 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                    ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload'
    +   76 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   77 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   77 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   78 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload'
    +   78 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   78 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload'
    +   78 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   79 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   79 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   80 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload'
    +   80 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   80 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload'
    +   80 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   81 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   81 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   82 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload'
    +   82 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   82 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                      ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload'
    +   82 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   83 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   83 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   84 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload'
    +   84 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   84 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload'
    +   84 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   85 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   85 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   86 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload'
    +   86 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   86 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                                    ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload'
    +   86 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   87 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   87 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   88 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload'
    +   88 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   88 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                                    ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload'
    +   88 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   89 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   89 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   90 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload'
    +   90 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   90 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                      ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload'
    +   90 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   91 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   91 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   92 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload'
    +   92 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   92 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload'
    +   92 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   93 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   93 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   94 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload'
    +   94 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   94 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                                    ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload'
    +   94 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   95 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   95 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   96 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                  ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload'
    +   96 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   96 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |                                                                                                                                                    ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload'
    +   96 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   97 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |                                            ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |               ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
    +   97 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
    +      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:89: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   98 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) );
    +      |                                                                                         ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  212 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:9: note: in expansion of macro 'gemmini_extended_mvout'
    +   98 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) );
    +      |         ^~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +   99 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) );
    +      |                                                                                              ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  212 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:9: note: in expansion of macro 'gemmini_extended_mvout'
    +   99 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) );
    +      |         ^~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +  100 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) );
    +      |                                                                                              ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  212 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:9: note: in expansion of macro 'gemmini_extended_mvout'
    +  100 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) );
    +      |         ^~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +  101 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) );
    +      |                                                                                              ^
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
    +  152 |         : "r"(rs1), "r"(rs2));                                                       \
    +      |                         ^~~
    +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
    +  212 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
    +      |   ^~~~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:9: note: in expansion of macro 'gemmini_extended_mvout'
    +  101 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) );
    +      |         ^~~~~~~~~~~~~~~~~~~~~~
    +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:105:17: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
    +  105 |   gemm_acc_free((uint32_t)(res));
    +      |                 ^
    +[3/21] Generating buddy_matmul.o
    +[4/21] Building CXX object benchmarks/Gemmini/ResNet-101/CMakeFiles/CRunnerUtils.dir/CRunnerUtils.cpp.o
    +[5/21] Performing download step (git clone) for 'project_googlebenchmark'
    +Cloning into 'project_googlebenchmark'...
    +HEAD is now at f91b6b4 bump version to 1.6 in preparation for release
    +[6/21] Generating resnet-101.o
    +ninja: build stopped: subcommand failed.
    +
    \ No newline at end of file diff --git a/site/geminiprocessing/cmake_configure.html b/site/geminiprocessing/cmake_configure.html new file mode 100644 index 00000000..4c7b91ea --- /dev/null +++ b/site/geminiprocessing/cmake_configure.html @@ -0,0 +1,38 @@ +

    geminiprocessing/cmake_configure.log

    2025-05-26 14:27:29 UTC

    -- The CXX compiler identification is GNU 9.2.0
    +-- The C compiler identification is GNU 9.2.0
    +-- Detecting CXX compiler ABI info
    +-- Detecting CXX compiler ABI info - done
    +-- Check for working CXX compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-g++ - skipped
    +-- Detecting CXX compile features
    +-- Detecting CXX compile features - done
    +-- Detecting C compiler ABI info
    +-- Detecting C compiler ABI info - done
    +-- Check for working C compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-gcc - skipped
    +-- Detecting C compile features
    +-- Detecting C compile features - done
    +-- Configuring Target Architecture: avx512f
    +-- Configuring Target Triple: x86_64-unknown-linux-gnu
    +-- Configuring benchmarks: google
    +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
    +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
    +-- Looking for pthread_create in pthreads
    +-- Looking for pthread_create in pthreads - not found
    +-- Looking for pthread_create in pthread
    +-- Looking for pthread_create in pthread - found
    +-- Found Threads: TRUE  
    +-- Performing Test HAVE_SSE
    +-- Performing Test HAVE_SSE - Failed
    +-- 	SSE support - no
    +-- Performing Test HAVE_AVX2
    +-- Performing Test HAVE_AVX2 - Failed
    +-- 	AVX2 support - no
    +-- Performing Test HAVE_AVX512
    +-- Performing Test HAVE_AVX512 - Failed
    +-- 	AVX512 support - no
    +-- Performing Test HAVE_NEON
    +-- Performing Test HAVE_NEON - Failed
    +-- 	Arm Neon support - no
    +-- Configuring done
    +-- Generating done
    +-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build
    +
    \ No newline at end of file diff --git a/site/index.html b/site/index.html new file mode 100644 index 00000000..f1071e42 --- /dev/null +++ b/site/index.html @@ -0,0 +1,34 @@ +

    Buddy-Benchmark results

    \ No newline at end of file diff --git a/site/vectorization/vectorization_result.html b/site/vectorization/vectorization_result.html new file mode 100644 index 00000000..eef7bfda --- /dev/null +++ b/site/vectorization/vectorization_result.html @@ -0,0 +1,204 @@ +

    vectorization/vectorization_result.log

    2025-05-26 14:27:29 UTC

    Vectorization Benchmark - Sun May 25 19:57:30 UTC 2025
    +[Info] Starting vectorization-matrix-benchmark build...
    +[Info] Running CMake configuration...
    +-- The CXX compiler identification is GNU 11.4.0
    +-- The C compiler identification is GNU 11.4.0
    +-- Detecting CXX compiler ABI info
    +-- Detecting CXX compiler ABI info - done
    +-- Check for working CXX compiler: /usr/bin/c++ - skipped
    +-- Detecting CXX compile features
    +-- Detecting CXX compile features - done
    +-- Detecting C compiler ABI info
    +-- Detecting C compiler ABI info - done
    +-- Check for working C compiler: /usr/bin/cc - skipped
    +-- Detecting C compile features
    +-- Detecting C compile features - done
    +-- Configuring Target Architecture: avx512f
    +-- Configuring Target Triple: x86_64-unknown-linux-gnu
    +-- Configuring benchmarks: google
    +-- Looking for pthread.h
    +-- Looking for pthread.h - found
    +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
    +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
    +-- Found Threads: TRUE  
    +-- Performing Test HAVE_SSE
    +-- Performing Test HAVE_SSE - Success
    +-- 	SSE support - yes
    +-- Performing Test HAVE_AVX2
    +-- Performing Test HAVE_AVX2 - Success
    +-- 	AVX2 support - yes
    +-- Performing Test HAVE_AVX512
    +-- Performing Test HAVE_AVX512 - Failed
    +-- 	AVX512 support - no
    +-- Performing Test HAVE_NEON
    +-- Performing Test HAVE_NEON - Failed
    +-- 	Arm Neon support - no
    +-- Configuring done
    +-- Generating done
    +-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build
    +[Info] Building vectorization-matrix-benchmark...
    +[1/17] Generating mlir-matmul.o
    +[2/17] Generating mlir-matvec.o
    +[3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a
    +[4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a
    +[5/17] Creating directories for 'project_googlebenchmark'
    +[6/17] Performing download step (git clone) for 'project_googlebenchmark'
    +Cloning into 'project_googlebenchmark'...
    +HEAD is now at f91b6b4 bump version to 1.6 in preparation for release
    +[7/17] No update step for 'project_googlebenchmark'
    +[8/17] No patch step for 'project_googlebenchmark'
    +[9/17] Performing configure step for 'project_googlebenchmark'
    +-- The CXX compiler identification is GNU 11.4.0
    +-- Detecting CXX compiler ABI info
    +-- Detecting CXX compiler ABI info - done
    +-- Check for working CXX compiler: /usr/bin/c++ - skipped
    +-- Detecting CXX compile features
    +-- Detecting CXX compile features - done
    +-- Failed to find LLVM FileCheck
    +-- Found Git: /usr/bin/git (found version "2.34.1") 
    +-- git version: v1.6.0 normalized to 1.6.0
    +-- Version: 1.6.0
    +-- Performing Test HAVE_CXX_FLAG_STD_CXX11
    +-- Performing Test HAVE_CXX_FLAG_STD_CXX11 - Success
    +-- Performing Test HAVE_CXX_FLAG_WALL
    +-- Performing Test HAVE_CXX_FLAG_WALL - Success
    +-- Performing Test HAVE_CXX_FLAG_WEXTRA
    +-- Performing Test HAVE_CXX_FLAG_WEXTRA - Success
    +-- Performing Test HAVE_CXX_FLAG_WSHADOW
    +-- Performing Test HAVE_CXX_FLAG_WSHADOW - Success
    +-- Performing Test HAVE_CXX_FLAG_WERROR
    +-- Performing Test HAVE_CXX_FLAG_WERROR - Success
    +-- Performing Test HAVE_CXX_FLAG_WSUGGEST_OVERRIDE
    +-- Performing Test HAVE_CXX_FLAG_WSUGGEST_OVERRIDE - Success
    +-- Performing Test HAVE_CXX_FLAG_PEDANTIC
    +-- Performing Test HAVE_CXX_FLAG_PEDANTIC - Success
    +-- Performing Test HAVE_CXX_FLAG_PEDANTIC_ERRORS
    +-- Performing Test HAVE_CXX_FLAG_PEDANTIC_ERRORS - Success
    +-- Performing Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32
    +-- Performing Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32 - Failed
    +-- Performing Test HAVE_CXX_FLAG_FSTRICT_ALIASING
    +-- Performing Test HAVE_CXX_FLAG_FSTRICT_ALIASING - Success
    +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS
    +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS - Success
    +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED
    +-- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED - Success
    +-- Performing Test HAVE_CXX_FLAG_WSTRICT_ALIASING
    +-- Performing Test HAVE_CXX_FLAG_WSTRICT_ALIASING - Success
    +-- Performing Test HAVE_CXX_FLAG_WD654
    +-- Performing Test HAVE_CXX_FLAG_WD654 - Failed
    +-- Performing Test HAVE_CXX_FLAG_WTHREAD_SAFETY
    +-- Performing Test HAVE_CXX_FLAG_WTHREAD_SAFETY - Failed
    +-- Performing Test HAVE_CXX_FLAG_COVERAGE
    +-- Performing Test HAVE_CXX_FLAG_COVERAGE - Success
    +-- Performing Test HAVE_STD_REGEX
    +CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message):
    +  If you see build failures due to cross compilation, try setting
    +  HAVE_STD_REGEX to 0
    +Call Stack (most recent call first):
    +  CMakeLists.txt:279 (cxx_feature_check)
    +
    +
    +-- Performing Test HAVE_STD_REGEX -- success
    +-- Performing Test HAVE_GNU_POSIX_REGEX
    +-- Performing Test HAVE_GNU_POSIX_REGEX -- failed to compile
    +-- Performing Test HAVE_POSIX_REGEX
    +CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message):
    +  If you see build failures due to cross compilation, try setting
    +  HAVE_POSIX_REGEX to 0
    +Call Stack (most recent call first):
    +  CMakeLists.txt:281 (cxx_feature_check)
    +
    +
    +-- Performing Test HAVE_POSIX_REGEX -- success
    +-- Performing Test HAVE_STEADY_CLOCK
    +CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message):
    +  If you see build failures due to cross compilation, try setting
    +  HAVE_STEADY_CLOCK to 0
    +Call Stack (most recent call first):
    +  CMakeLists.txt:290 (cxx_feature_check)
    +
    +
    +-- Performing Test HAVE_STEADY_CLOCK -- success
    +-- Looking for C++ include pthread.h
    +-- Looking for C++ include pthread.h - found
    +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
    +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
    +-- Found Threads: TRUE  
    +-- Configuring done
    +-- Generating done
    +-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build
    +[10/17] Performing build step for 'project_googlebenchmark'
    +[1/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o
    +[2/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o
    +[3/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o
    +[4/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o
    +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o
    +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o
    +[7/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o
    +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o
    +[9/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o
    +[10/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o
    +[11/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o
    +[12/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o
    +[13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o
    +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o
    +[15/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o
    +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o
    +[17/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o
    +[18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o
    +[19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o
    +[20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o
    +[21/22] Linking CXX static library src/libbenchmark.a
    +[22/22] Linking CXX static library src/libbenchmark_main.a
    +[11/17] Performing install step for 'project_googlebenchmark'
    +[0/1] Install the project...
    +-- Install configuration: "Release"
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/libbenchmark.a
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/libbenchmark_main.a
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/include/benchmark
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/include/benchmark/benchmark.h
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkConfig.cmake
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkConfigVersion.cmake
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/pkgconfig/benchmark.pc
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkTargets.cmake
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkTargets-release.cmake
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/AssemblyTests.md
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/_config.yml
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/dependencies.md
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/index.md
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/perf_counters.md
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/platform_specific_build_instructions.md
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/random_interleaving.md
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/releasing.md
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/tools.md
    +-- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/user_guide.md
    +[12/17] No test step for 'project_googlebenchmark'
    +[13/17] Completed 'project_googlebenchmark'
    +[14/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/Main.cpp.o
    +[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o
    +[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o
    +[17/17] Linking CXX executable bin/vectorization-matrix-benchmark
    +[Info] Running vectorization-matrix-benchmark...
    +2025-05-25T19:57:36+00:00
    +Running ./vectorization-matrix-benchmark
    +Run on (24 X 5100 MHz CPU s)
    +CPU Caches:
    +  L1 Data 48 KiB (x12)
    +  L1 Instruction 32 KiB (x12)
    +  L2 Unified 1280 KiB (x12)
    +  L3 Unified 30720 KiB (x1)
    +Load Average: 20.36, 14.10, 7.26
    +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
    +--------------------------------------------------------
    +Benchmark              Time             CPU   Iterations
    +--------------------------------------------------------
    +MLIR_MatMul/1       18.7 ns         18.7 ns     36846301
    +MLIR_MatVec/1       20.4 ns         20.4 ns     34734438
    +--------------------------------------------------------
    +MLIR_MatMul: MLIR MatMul Operation + Nested Loop
    +[ 18 18 18 18 18 18 18 18 18 18 ]
    +--------------------------------------------------------
    +MLIR_MatVec: MLIR MatVec Operation
    +[ 18 18 18 18 18 18 18 18 18 18 ]
    +
    \ No newline at end of file diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log index e7431d95..bc6a3933 100755 --- a/test_result/vectorization/vectorization_result.log +++ b/test_result/vectorization/vectorization_result.log @@ -1,4 +1,4 @@ -Vectorization Benchmark - Sun May 25 19:53:00 UTC 2025 +Vectorization Benchmark - Sun May 25 19:57:30 UTC 2025 [Info] Starting vectorization-matrix-benchmark build... [Info] Running CMake configuration... -- The CXX compiler identification is GNU 11.4.0 @@ -128,21 +128,21 @@ Call Stack (most recent call first): -- Generating done -- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build [10/17] Performing build step for 'project_googlebenchmark' -[1/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o -[2/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o -[3/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o -[4/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o -[5/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o -[6/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o -[7/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o -[8/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o -[9/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o -[10/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o -[11/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o -[12/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o -[13/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o -[14/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o -[15/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o +[1/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o +[2/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[3/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o +[4/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o +[7/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o +[9/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[10/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[11/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o +[12/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o +[13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o +[15/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o [16/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o [17/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o [18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o @@ -180,7 +180,7 @@ Call Stack (most recent call first): [16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o [17/17] Linking CXX executable bin/vectorization-matrix-benchmark [Info] Running vectorization-matrix-benchmark... -2025-05-25T19:53:07+00:00 +2025-05-25T19:57:36+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -188,13 +188,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 0.33, 0.10, 1.62 +Load Average: 20.36, 14.10, 7.26 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 18.7 ns 18.7 ns 37229964 -MLIR_MatVec/1 20.5 ns 20.5 ns 34308357 +MLIR_MatMul/1 18.7 ns 18.7 ns 36846301 +MLIR_MatVec/1 20.4 ns 20.4 ns 34734438 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] From 8d847a36084b37518200ce11ae0933309686ac5a Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Mon, 26 May 2025 17:00:17 +0200 Subject: [PATCH 12/52] ci: run benchmarks inside reusable docker container --- .github/workflows/bench.yml | 1 - .../build_results_crosscompile_summary.html | 2 +- site/deeplearning/build_results_summary.html | 2 +- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- site/deeplearning/run_results_summary.html | 2 +- site/geminiprocessing/build.html | 2 +- site/geminiprocessing/cmake_configure.html | 2 +- site/vectorization/vectorization_result.html | 48 +++++++++---------- .../vectorization/vectorization_result.log | 48 +++++++++---------- 36 files changed, 81 insertions(+), 82 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index ba1b0ea1..5e002603 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -61,4 +61,3 @@ jobs: outputs: page_url: ${{ steps.deploy.outputs.page_url }} - diff --git a/site/deeplearning/build_results_crosscompile_summary.html b/site/deeplearning/build_results_crosscompile_summary.html index d7779793..397ed451 100644 --- a/site/deeplearning/build_results_crosscompile_summary.html +++ b/site/deeplearning/build_results_crosscompile_summary.html @@ -1,4 +1,4 @@ -

    deeplearning/build_results_crosscompile_summary.log

    2025-05-26 14:27:29 UTC

    [Failed]  Build of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/build_results_crosscompile_summary.log

    2025-05-26 14:41:51 UTC

    [Failed]  Build of 'dl-model-tinyllama-benchmark'
     [Failed]  Build of 'dl-model-mobilenetv3-benchmark'
     [Success] Build of 'dl-model-lenet-benchmark'
     [Failed]  Build of 'dl-model-bert-benchmark'
    diff --git a/site/deeplearning/build_results_summary.html b/site/deeplearning/build_results_summary.html
    index d3236314..ade5473d 100644
    --- a/site/deeplearning/build_results_summary.html
    +++ b/site/deeplearning/build_results_summary.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/build_results_summary.log

    2025-05-26 14:27:29 UTC

    [Success] Build of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/build_results_summary.log

    2025-05-26 14:41:51 UTC

    [Success] Build of 'dl-model-tinyllama-benchmark'
     [Success] Build of 'dl-model-mobilenetv3-benchmark'
     [Success] Build of 'dl-model-lenet-benchmark'
     [Failed]  Build of 'dl-model-bert-benchmark'
    diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html
    index 7d027ff7..92106174 100644
    --- a/site/deeplearning/dl-layer-ffn-benchmark.html
    +++ b/site/deeplearning/dl-layer-ffn-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-layer-ffn-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:30+00:00
    +

    deeplearning/dl-layer-ffn-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:30+00:00
     Running ./dl-layer-ffn-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    index cf717809..78e380e6 100644
    --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-layer-rmsnorm-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:34+00:00
    +

    deeplearning/dl-layer-rmsnorm-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:34+00:00
     Running ./dl-layer-rmsnorm-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html
    index 48d5e9be..a58e8504 100644
    --- a/site/deeplearning/dl-layer-selfattention-benchmark.html
    +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-layer-selfattention-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:32+00:00
    +

    deeplearning/dl-layer-selfattention-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:32+00:00
     Running ./dl-layer-selfattention-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html
    index 9011db4e..beb21b43 100644
    --- a/site/deeplearning/dl-model-lenet-benchmark.html
    +++ b/site/deeplearning/dl-model-lenet-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-lenet-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:29:36+00:00
    +

    deeplearning/dl-model-lenet-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:29:36+00:00
     Running ./dl-model-lenet-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    index 7109b91f..11e9b8ff 100644
    --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-mobilenetv3-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:29:34+00:00
    +

    deeplearning/dl-model-mobilenetv3-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:29:34+00:00
     Running ./dl-model-mobilenetv3-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html
    index 194a6cc9..e81efdab 100644
    --- a/site/deeplearning/dl-model-resnet18-benchmark.html
    +++ b/site/deeplearning/dl-model-resnet18-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-resnet18-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:27+00:00
    +

    deeplearning/dl-model-resnet18-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:27+00:00
     Running ./dl-model-resnet18-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html
    index 8e18ec1d..a70ce485 100644
    --- a/site/deeplearning/dl-model-tinyllama-benchmark.html
    +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-tinyllama-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:24:01+00:00
    +

    deeplearning/dl-model-tinyllama-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:24:01+00:00
     Running ./dl-model-tinyllama-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html
    index 75b3b90d..2474fed6 100644
    --- a/site/deeplearning/dl-model-whisper-benchmark.html
    +++ b/site/deeplearning/dl-model-whisper-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-whisper-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:29:38+00:00
    +

    deeplearning/dl-model-whisper-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:29:38+00:00
     Running ./dl-model-whisper-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    index cb45e65e..6e70ddfc 100644
    --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithaddf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:03+00:00
    +

    deeplearning/dl-op-linalg-arithaddf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:03+00:00
     Running ./dl-op-linalg-arithaddf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    index 0e383307..9027b786 100644
    --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithdivf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:05+00:00
    +

    deeplearning/dl-op-linalg-arithdivf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:05+00:00
     Running ./dl-op-linalg-arithdivf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    index 9f5648c2..69e75449 100644
    --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithmulf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:07+00:00
    +

    deeplearning/dl-op-linalg-arithmulf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:07+00:00
     Running ./dl-op-linalg-arithmulf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    index 17661c41..9f1b6a84 100644
    --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithnegf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:09+00:00
    +

    deeplearning/dl-op-linalg-arithnegf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:09+00:00
     Running ./dl-op-linalg-arithnegf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    index c8ce28ec..0f39fb7f 100644
    --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithsubf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:11+00:00
    +

    deeplearning/dl-op-linalg-arithsubf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:11+00:00
     Running ./dl-op-linalg-arithsubf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    index d880d8ee..f2b2030a 100644
    --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-batch-matmul-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:53+00:00
    +

    deeplearning/dl-op-linalg-batch-matmul-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:53+00:00
     Running ./dl-op-linalg-batch-matmul-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    index 2b37cbba..db3aa725 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:46+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:46+00:00
     Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    index 37d8f93e..52c50095 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:50+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:50+00:00
     Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    index 490e872c..eda0844e 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:48+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:48+00:00
     Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    index ce8b1d43..b7bf5bf8 100644
    --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:50+00:00
    +

    deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:50+00:00
     Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    index f6048848..e8fa1ce1 100644
    --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-mathexp-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:17+00:00
    +

    deeplearning/dl-op-linalg-mathexp-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:17+00:00
     Running ./dl-op-linalg-mathexp-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    index 0d29ac56..2cb46dc5 100644
    --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-mathfpow-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:13+00:00
    +

    deeplearning/dl-op-linalg-mathfpow-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:13+00:00
     Running ./dl-op-linalg-mathfpow-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    index 42e2576f..5dd5a0a3 100644
    --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-mathrsqrt-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:15+00:00
    +

    deeplearning/dl-op-linalg-mathrsqrt-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:15+00:00
     Running ./dl-op-linalg-mathrsqrt-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    index 7eaed5f5..03740fa6 100644
    --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-matmul-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:36+00:00
    +

    deeplearning/dl-op-linalg-matmul-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:36+00:00
     Running ./dl-op-linalg-matmul-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    index ae62ac2c..dc894af4 100644
    --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:33:51+00:00
    +

    deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:51+00:00
     Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    index 22b31c55..e18f7b66 100644
    --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-reduceaddf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-reduceaddf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-reduceaddf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    index 22a3fa6b..967033fc 100644
    --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-reducemaxf-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-reducemaxf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-reducemaxf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    index cfbec49b..dd0bc482 100644
    --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    index c673eb82..93028439 100644
    --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-matmul-transpose-b-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:22+00:00
    +

    deeplearning/dl-op-matmul-transpose-b-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:22+00:00
     Running ./dl-op-matmul-transpose-b-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    index 56ac1ea7..33e7a1de 100644
    --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-tosa-transpose-benchmark.log

    2025-05-26 14:27:29 UTC

    2025-05-25T16:34:22+00:00
    +

    deeplearning/dl-op-tosa-transpose-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:22+00:00
     Running ./dl-op-tosa-transpose-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/run_results_summary.html b/site/deeplearning/run_results_summary.html
    index 687db649..87f85ce5 100644
    --- a/site/deeplearning/run_results_summary.html
    +++ b/site/deeplearning/run_results_summary.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/run_results_summary.log

    2025-05-26 14:27:29 UTC

    [Success] Run of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/run_results_summary.log

    2025-05-26 14:41:51 UTC

    [Success] Run of 'dl-model-tinyllama-benchmark'
     [Success] Run of 'dl-model-mobilenetv3-benchmark'
     [Success] Run of 'dl-model-lenet-benchmark'
     [Missing] Executable not found for 'dl-model-bert-benchmark'
    diff --git a/site/geminiprocessing/build.html b/site/geminiprocessing/build.html
    index 742b1602..50a394fb 100644
    --- a/site/geminiprocessing/build.html
    +++ b/site/geminiprocessing/build.html
    @@ -1,4 +1,4 @@
    -

    geminiprocessing/build.log

    2025-05-26 14:27:29 UTC

    [1/21] Creating directories for 'project_googlebenchmark'
    +

    geminiprocessing/build.log

    2025-05-26 14:41:51 UTC

    [1/21] Creating directories for 'project_googlebenchmark'
     [2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o
     FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o 
     riscv64-unknown-linux-gnu-gcc  -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c
    diff --git a/site/geminiprocessing/cmake_configure.html b/site/geminiprocessing/cmake_configure.html
    index 4c7b91ea..fdc02bbb 100644
    --- a/site/geminiprocessing/cmake_configure.html
    +++ b/site/geminiprocessing/cmake_configure.html
    @@ -1,4 +1,4 @@
    -

    geminiprocessing/cmake_configure.log

    2025-05-26 14:27:29 UTC

    -- The CXX compiler identification is GNU 9.2.0
    +

    geminiprocessing/cmake_configure.log

    2025-05-26 14:41:51 UTC

    -- The CXX compiler identification is GNU 9.2.0
     -- The C compiler identification is GNU 9.2.0
     -- Detecting CXX compiler ABI info
     -- Detecting CXX compiler ABI info - done
    diff --git a/site/vectorization/vectorization_result.html b/site/vectorization/vectorization_result.html
    index eef7bfda..6dd54a0a 100644
    --- a/site/vectorization/vectorization_result.html
    +++ b/site/vectorization/vectorization_result.html
    @@ -1,4 +1,4 @@
    -

    vectorization/vectorization_result.log

    2025-05-26 14:27:29 UTC

    Vectorization Benchmark - Sun May 25 19:57:30 UTC 2025
    +

    vectorization/vectorization_result.log

    2025-05-26 14:41:51 UTC

    Vectorization Benchmark - Mon May 26 14:41:41 UTC 2025
     [Info] Starting vectorization-matrix-benchmark build...
     [Info] Running CMake configuration...
     -- The CXX compiler identification is GNU 11.4.0
    @@ -39,8 +39,8 @@ 

    vectorization/vectorization_result.log

    2025-05-26 14:27:29 UTC

    vectorization/vectorization_result.log

    2025-05-26 14:27:29 UTC

    vectorization/vectorization_result.log

    2025-05-26 14:27:29 UTC

    vectorization/vectorization_result.log

    2025-05-26 14:27:29 UTC

    Date: Mon, 26 May 2025 21:04:20 +0200 Subject: [PATCH 13/52] web uploading --- scripts/logs2html.py | 28 ++++++++--- .../build_results_crosscompile_summary.html | 2 +- site/deeplearning/build_results_summary.html | 2 +- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- site/deeplearning/run_results_summary.html | 2 +- site/geminiprocessing/build.html | 2 +- site/geminiprocessing/cmake_configure.html | 2 +- site/vectorization/vectorization_result.html | 48 +++++++++---------- .../vectorization/vectorization_result.log | 48 +++++++++---------- 36 files changed, 103 insertions(+), 87 deletions(-) diff --git a/scripts/logs2html.py b/scripts/logs2html.py index f96eab99..26f423bf 100755 --- a/scripts/logs2html.py +++ b/scripts/logs2html.py @@ -1,17 +1,33 @@ #!/usr/bin/env python3 -"""Turn every *.log under into /.html + an index.html.""" +"""Convert every *.log under into HTML inside and build an index.""" + import html, pathlib, datetime, sys src, dst = map(pathlib.Path, sys.argv[1:3]) dst.mkdir(parents=True, exist_ok=True) -stamp = datetime.datetime.utcnow().isoformat(' ', 'seconds') +stamp = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC") +# --------------------------------------------------------------------------- +# 1) turn each *.log into an HTML page at the same relative location +# --------------------------------------------------------------------------- for log in src.rglob("*.log"): rel = log.relative_to(src) page = dst / rel.with_suffix(".html") page.parent.mkdir(parents=True, exist_ok=True) - page.write_text(f"

    {rel}

    {stamp} UTC

    {html.escape(log.read_text())}
    ") + page.write_text( + f"

    {rel}

    {stamp}

    {html.escape(log.read_text())}
    " + ) + +# --------------------------------------------------------------------------- +# 2) build index.html with links **relative to `dst`** +# --------------------------------------------------------------------------- +links = "\n".join( + f'
  • ' + f'{p.relative_to(dst).as_posix()}
  • ' + for p in sorted(dst.rglob("*.html")) + if p.name != "index.html" +) -links = "\n".join(f'
  • {p.as_posix()}
  • ' - for p in sorted(dst.rglob("*.html")) if p.name != "index.html") -(dst / "index.html").write_text(f"

    Buddy-Benchmark results

      {links}
    ") +(dst / "index.html").write_text( + f"

    Buddy-Benchmark results

      \n{links}\n
    " +) diff --git a/site/deeplearning/build_results_crosscompile_summary.html b/site/deeplearning/build_results_crosscompile_summary.html index 397ed451..fb9230a8 100644 --- a/site/deeplearning/build_results_crosscompile_summary.html +++ b/site/deeplearning/build_results_crosscompile_summary.html @@ -1,4 +1,4 @@ -

    deeplearning/build_results_crosscompile_summary.log

    2025-05-26 14:41:51 UTC

    [Failed]  Build of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/build_results_crosscompile_summary.log

    2025-05-26 15:04:56 UTC

    [Failed]  Build of 'dl-model-tinyllama-benchmark'
     [Failed]  Build of 'dl-model-mobilenetv3-benchmark'
     [Success] Build of 'dl-model-lenet-benchmark'
     [Failed]  Build of 'dl-model-bert-benchmark'
    diff --git a/site/deeplearning/build_results_summary.html b/site/deeplearning/build_results_summary.html
    index ade5473d..358cd47d 100644
    --- a/site/deeplearning/build_results_summary.html
    +++ b/site/deeplearning/build_results_summary.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/build_results_summary.log

    2025-05-26 14:41:51 UTC

    [Success] Build of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/build_results_summary.log

    2025-05-26 15:04:56 UTC

    [Success] Build of 'dl-model-tinyllama-benchmark'
     [Success] Build of 'dl-model-mobilenetv3-benchmark'
     [Success] Build of 'dl-model-lenet-benchmark'
     [Failed]  Build of 'dl-model-bert-benchmark'
    diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html
    index 92106174..ff96b640 100644
    --- a/site/deeplearning/dl-layer-ffn-benchmark.html
    +++ b/site/deeplearning/dl-layer-ffn-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-layer-ffn-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:30+00:00
    +

    deeplearning/dl-layer-ffn-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:30+00:00
     Running ./dl-layer-ffn-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    index 78e380e6..7a5aa827 100644
    --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-layer-rmsnorm-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:34+00:00
    +

    deeplearning/dl-layer-rmsnorm-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:34+00:00
     Running ./dl-layer-rmsnorm-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html
    index a58e8504..373f4fd5 100644
    --- a/site/deeplearning/dl-layer-selfattention-benchmark.html
    +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-layer-selfattention-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:32+00:00
    +

    deeplearning/dl-layer-selfattention-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:32+00:00
     Running ./dl-layer-selfattention-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html
    index beb21b43..54a60e30 100644
    --- a/site/deeplearning/dl-model-lenet-benchmark.html
    +++ b/site/deeplearning/dl-model-lenet-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-lenet-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:29:36+00:00
    +

    deeplearning/dl-model-lenet-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:29:36+00:00
     Running ./dl-model-lenet-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    index 11e9b8ff..19f7c5ec 100644
    --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-mobilenetv3-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:29:34+00:00
    +

    deeplearning/dl-model-mobilenetv3-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:29:34+00:00
     Running ./dl-model-mobilenetv3-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html
    index e81efdab..dfe83825 100644
    --- a/site/deeplearning/dl-model-resnet18-benchmark.html
    +++ b/site/deeplearning/dl-model-resnet18-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-resnet18-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:27+00:00
    +

    deeplearning/dl-model-resnet18-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:27+00:00
     Running ./dl-model-resnet18-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html
    index a70ce485..d24b04f9 100644
    --- a/site/deeplearning/dl-model-tinyllama-benchmark.html
    +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-tinyllama-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:24:01+00:00
    +

    deeplearning/dl-model-tinyllama-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:24:01+00:00
     Running ./dl-model-tinyllama-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html
    index 2474fed6..6dc1c8ca 100644
    --- a/site/deeplearning/dl-model-whisper-benchmark.html
    +++ b/site/deeplearning/dl-model-whisper-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-whisper-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:29:38+00:00
    +

    deeplearning/dl-model-whisper-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:29:38+00:00
     Running ./dl-model-whisper-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    index 6e70ddfc..dd5ed09d 100644
    --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithaddf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:03+00:00
    +

    deeplearning/dl-op-linalg-arithaddf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:03+00:00
     Running ./dl-op-linalg-arithaddf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    index 9027b786..724af897 100644
    --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithdivf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:05+00:00
    +

    deeplearning/dl-op-linalg-arithdivf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:05+00:00
     Running ./dl-op-linalg-arithdivf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    index 69e75449..ab5f539b 100644
    --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithmulf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:07+00:00
    +

    deeplearning/dl-op-linalg-arithmulf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:07+00:00
     Running ./dl-op-linalg-arithmulf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    index 9f1b6a84..196fc7d3 100644
    --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithnegf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:09+00:00
    +

    deeplearning/dl-op-linalg-arithnegf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:09+00:00
     Running ./dl-op-linalg-arithnegf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    index 0f39fb7f..b6d8e091 100644
    --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithsubf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:11+00:00
    +

    deeplearning/dl-op-linalg-arithsubf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:11+00:00
     Running ./dl-op-linalg-arithsubf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    index f2b2030a..df91d597 100644
    --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-batch-matmul-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:53+00:00
    +

    deeplearning/dl-op-linalg-batch-matmul-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:53+00:00
     Running ./dl-op-linalg-batch-matmul-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    index db3aa725..470eddc0 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:46+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:46+00:00
     Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    index 52c50095..aedb3e11 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:50+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:50+00:00
     Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    index eda0844e..470c07c3 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:48+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:48+00:00
     Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    index b7bf5bf8..f8ddea55 100644
    --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:50+00:00
    +

    deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:50+00:00
     Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    index e8fa1ce1..031f97ea 100644
    --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-mathexp-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:17+00:00
    +

    deeplearning/dl-op-linalg-mathexp-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:17+00:00
     Running ./dl-op-linalg-mathexp-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    index 2cb46dc5..50282df9 100644
    --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-mathfpow-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:13+00:00
    +

    deeplearning/dl-op-linalg-mathfpow-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:13+00:00
     Running ./dl-op-linalg-mathfpow-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    index 5dd5a0a3..2b883a58 100644
    --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-mathrsqrt-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:15+00:00
    +

    deeplearning/dl-op-linalg-mathrsqrt-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:15+00:00
     Running ./dl-op-linalg-mathrsqrt-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    index 03740fa6..204a97cb 100644
    --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-matmul-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:36+00:00
    +

    deeplearning/dl-op-linalg-matmul-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:36+00:00
     Running ./dl-op-linalg-matmul-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    index dc894af4..59ad95b4 100644
    --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:33:51+00:00
    +

    deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:51+00:00
     Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    index e18f7b66..877a99c5 100644
    --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-reduceaddf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-reduceaddf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-reduceaddf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    index 967033fc..84b00774 100644
    --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-reducemaxf-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-reducemaxf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-reducemaxf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    index dd0bc482..ef52e71d 100644
    --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    index 93028439..014eaa7b 100644
    --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-matmul-transpose-b-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:22+00:00
    +

    deeplearning/dl-op-matmul-transpose-b-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:22+00:00
     Running ./dl-op-matmul-transpose-b-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    index 33e7a1de..cc8ee941 100644
    --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-tosa-transpose-benchmark.log

    2025-05-26 14:41:51 UTC

    2025-05-25T16:34:22+00:00
    +

    deeplearning/dl-op-tosa-transpose-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:22+00:00
     Running ./dl-op-tosa-transpose-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/run_results_summary.html b/site/deeplearning/run_results_summary.html
    index 87f85ce5..796bb9be 100644
    --- a/site/deeplearning/run_results_summary.html
    +++ b/site/deeplearning/run_results_summary.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/run_results_summary.log

    2025-05-26 14:41:51 UTC

    [Success] Run of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/run_results_summary.log

    2025-05-26 15:04:56 UTC

    [Success] Run of 'dl-model-tinyllama-benchmark'
     [Success] Run of 'dl-model-mobilenetv3-benchmark'
     [Success] Run of 'dl-model-lenet-benchmark'
     [Missing] Executable not found for 'dl-model-bert-benchmark'
    diff --git a/site/geminiprocessing/build.html b/site/geminiprocessing/build.html
    index 50a394fb..2ea8adf6 100644
    --- a/site/geminiprocessing/build.html
    +++ b/site/geminiprocessing/build.html
    @@ -1,4 +1,4 @@
    -

    geminiprocessing/build.log

    2025-05-26 14:41:51 UTC

    [1/21] Creating directories for 'project_googlebenchmark'
    +

    geminiprocessing/build.log

    2025-05-26 15:04:56 UTC

    [1/21] Creating directories for 'project_googlebenchmark'
     [2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o
     FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o 
     riscv64-unknown-linux-gnu-gcc  -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c
    diff --git a/site/geminiprocessing/cmake_configure.html b/site/geminiprocessing/cmake_configure.html
    index fdc02bbb..624944dc 100644
    --- a/site/geminiprocessing/cmake_configure.html
    +++ b/site/geminiprocessing/cmake_configure.html
    @@ -1,4 +1,4 @@
    -

    geminiprocessing/cmake_configure.log

    2025-05-26 14:41:51 UTC

    -- The CXX compiler identification is GNU 9.2.0
    +

    geminiprocessing/cmake_configure.log

    2025-05-26 15:04:56 UTC

    -- The CXX compiler identification is GNU 9.2.0
     -- The C compiler identification is GNU 9.2.0
     -- Detecting CXX compiler ABI info
     -- Detecting CXX compiler ABI info - done
    diff --git a/site/vectorization/vectorization_result.html b/site/vectorization/vectorization_result.html
    index 6dd54a0a..58cdf0c0 100644
    --- a/site/vectorization/vectorization_result.html
    +++ b/site/vectorization/vectorization_result.html
    @@ -1,4 +1,4 @@
    -

    vectorization/vectorization_result.log

    2025-05-26 14:41:51 UTC

    Vectorization Benchmark - Mon May 26 14:41:41 UTC 2025
    +

    vectorization/vectorization_result.log

    2025-05-26 15:04:56 UTC

    Vectorization Benchmark - Mon May 26 15:04:46 UTC 2025
     [Info] Starting vectorization-matrix-benchmark build...
     [Info] Running CMake configuration...
     -- The CXX compiler identification is GNU 11.4.0
    @@ -39,8 +39,8 @@ 

    vectorization/vectorization_result.log

    2025-05-26 14:41:51 UTC

    vectorization/vectorization_result.log

    2025-05-26 14:41:51 UTC

    vectorization/vectorization_result.log

    2025-05-26 14:41:51 UTC

    vectorization/vectorization_result.log

    2025-05-26 14:41:51 UTC

    Date: Mon, 26 May 2025 21:38:42 +0200 Subject: [PATCH 14/52] ci: run benchmarks inside reusable docker container --- scripts/logs2html.py | 49 +++++++------ .../build_results_crosscompile_summary.html | 2 +- site/deeplearning/build_results_summary.html | 2 +- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- site/deeplearning/run_results_summary.html | 2 +- site/geminiprocessing/build.html | 2 +- site/geminiprocessing/cmake_configure.html | 2 +- site/index.html | 70 ++++++++++--------- site/vectorization/vectorization_result.html | 42 +++++------ test/test_script_deeplearning.sh | 25 +++++-- test/test_script_imageprocessing.sh | 59 ++++++++++------ test/test_script_vectorizationprocessing.sh | 6 +- .../vectorization/vectorization_matrix.json | 68 ++++++++++++++++++ .../vectorization/vectorization_result.log | 42 +++++------ 41 files changed, 268 insertions(+), 159 deletions(-) create mode 100644 test_result/vectorization/vectorization_matrix.json diff --git a/scripts/logs2html.py b/scripts/logs2html.py index 26f423bf..0aff2618 100755 --- a/scripts/logs2html.py +++ b/scripts/logs2html.py @@ -1,33 +1,42 @@ #!/usr/bin/env python3 -"""Convert every *.log under into HTML inside and build an index.""" - -import html, pathlib, datetime, sys +""" +Turn every *.log under into /.html +If a sibling *.json produced by Google Benchmark exists, render +its numbers as an HTML table right under the log. +""" +import html, json, pathlib, datetime, sys src, dst = map(pathlib.Path, sys.argv[1:3]) dst.mkdir(parents=True, exist_ok=True) stamp = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC") -# --------------------------------------------------------------------------- -# 1) turn each *.log into an HTML page at the same relative location -# --------------------------------------------------------------------------- +def gbench_json_to_table(js_path): + data = json.loads(js_path.read_text())["benchmarks"] + head = "NameTime (ns)CPU (ns)Iterations" + rows = "\n".join( + f"{b['name']}{b['real_time']:.1f}" + f"{b['cpu_time']:.1f}{b['iterations']}" + for b in data if "name" in b + ) + return f"

    Parsed numbers

    {head}{rows}
    " + for log in src.rglob("*.log"): rel = log.relative_to(src) page = dst / rel.with_suffix(".html") page.parent.mkdir(parents=True, exist_ok=True) - page.write_text( - f"

    {rel}

    {stamp}

    {html.escape(log.read_text())}
    " - ) -# --------------------------------------------------------------------------- -# 2) build index.html with links **relative to `dst`** -# --------------------------------------------------------------------------- -links = "\n".join( - f'
  • ' - f'{p.relative_to(dst).as_posix()}
  • ' - for p in sorted(dst.rglob("*.html")) - if p.name != "index.html" -) + body = [f"

    {rel}

    {stamp}

    ", + f"
    {html.escape(log.read_text())}
    "] + + json_peer = log.with_suffix(".json") + if json_peer.exists(): + body.append(gbench_json_to_table(json_peer)) -(dst / "index.html").write_text( - f"

    Buddy-Benchmark results

      \n{links}\n
    " + page.write_text("\n".join(body)) + +# rebuild index +links = "\n".join( + f'
  • {p.relative_to(dst).as_posix()}
  • ' + for p in sorted(dst.rglob("*.html")) if p.name != "index.html" ) +(dst / "index.html").write_text(f"

    Buddy-Benchmark results

      {links}
    ") diff --git a/site/deeplearning/build_results_crosscompile_summary.html b/site/deeplearning/build_results_crosscompile_summary.html index fb9230a8..e45f5d17 100644 --- a/site/deeplearning/build_results_crosscompile_summary.html +++ b/site/deeplearning/build_results_crosscompile_summary.html @@ -1,4 +1,4 @@ -

    deeplearning/build_results_crosscompile_summary.log

    2025-05-26 15:04:56 UTC

    [Failed]  Build of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/build_results_crosscompile_summary.log

    2025-05-26 19:08:59 UTC

    [Failed]  Build of 'dl-model-tinyllama-benchmark'
     [Failed]  Build of 'dl-model-mobilenetv3-benchmark'
     [Success] Build of 'dl-model-lenet-benchmark'
     [Failed]  Build of 'dl-model-bert-benchmark'
    diff --git a/site/deeplearning/build_results_summary.html b/site/deeplearning/build_results_summary.html
    index 358cd47d..fc695e24 100644
    --- a/site/deeplearning/build_results_summary.html
    +++ b/site/deeplearning/build_results_summary.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/build_results_summary.log

    2025-05-26 15:04:56 UTC

    [Success] Build of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/build_results_summary.log

    2025-05-26 19:08:59 UTC

    [Success] Build of 'dl-model-tinyllama-benchmark'
     [Success] Build of 'dl-model-mobilenetv3-benchmark'
     [Success] Build of 'dl-model-lenet-benchmark'
     [Failed]  Build of 'dl-model-bert-benchmark'
    diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html
    index ff96b640..98364083 100644
    --- a/site/deeplearning/dl-layer-ffn-benchmark.html
    +++ b/site/deeplearning/dl-layer-ffn-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-layer-ffn-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:30+00:00
    +

    deeplearning/dl-layer-ffn-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:30+00:00
     Running ./dl-layer-ffn-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    index 7a5aa827..e99464ce 100644
    --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-layer-rmsnorm-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:34+00:00
    +

    deeplearning/dl-layer-rmsnorm-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:34+00:00
     Running ./dl-layer-rmsnorm-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html
    index 373f4fd5..a7abafc0 100644
    --- a/site/deeplearning/dl-layer-selfattention-benchmark.html
    +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-layer-selfattention-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:32+00:00
    +

    deeplearning/dl-layer-selfattention-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:32+00:00
     Running ./dl-layer-selfattention-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html
    index 54a60e30..872705a2 100644
    --- a/site/deeplearning/dl-model-lenet-benchmark.html
    +++ b/site/deeplearning/dl-model-lenet-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-lenet-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:29:36+00:00
    +

    deeplearning/dl-model-lenet-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:29:36+00:00
     Running ./dl-model-lenet-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    index 19f7c5ec..bc2fba6e 100644
    --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-mobilenetv3-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:29:34+00:00
    +

    deeplearning/dl-model-mobilenetv3-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:29:34+00:00
     Running ./dl-model-mobilenetv3-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html
    index dfe83825..9aaf6fe6 100644
    --- a/site/deeplearning/dl-model-resnet18-benchmark.html
    +++ b/site/deeplearning/dl-model-resnet18-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-resnet18-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:27+00:00
    +

    deeplearning/dl-model-resnet18-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:27+00:00
     Running ./dl-model-resnet18-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html
    index d24b04f9..b6de06b3 100644
    --- a/site/deeplearning/dl-model-tinyllama-benchmark.html
    +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-tinyllama-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:24:01+00:00
    +

    deeplearning/dl-model-tinyllama-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:24:01+00:00
     Running ./dl-model-tinyllama-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html
    index 6dc1c8ca..c691b8f8 100644
    --- a/site/deeplearning/dl-model-whisper-benchmark.html
    +++ b/site/deeplearning/dl-model-whisper-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-model-whisper-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:29:38+00:00
    +

    deeplearning/dl-model-whisper-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:29:38+00:00
     Running ./dl-model-whisper-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    index dd5ed09d..bf5ce9c2 100644
    --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithaddf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:03+00:00
    +

    deeplearning/dl-op-linalg-arithaddf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:03+00:00
     Running ./dl-op-linalg-arithaddf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    index 724af897..f7020ba6 100644
    --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithdivf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:05+00:00
    +

    deeplearning/dl-op-linalg-arithdivf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:05+00:00
     Running ./dl-op-linalg-arithdivf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    index ab5f539b..941d1b4f 100644
    --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithmulf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:07+00:00
    +

    deeplearning/dl-op-linalg-arithmulf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:07+00:00
     Running ./dl-op-linalg-arithmulf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    index 196fc7d3..b29cfae1 100644
    --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithnegf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:09+00:00
    +

    deeplearning/dl-op-linalg-arithnegf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:09+00:00
     Running ./dl-op-linalg-arithnegf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    index b6d8e091..8b1fa53f 100644
    --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-arithsubf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:11+00:00
    +

    deeplearning/dl-op-linalg-arithsubf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:11+00:00
     Running ./dl-op-linalg-arithsubf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    index df91d597..b40b1946 100644
    --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-batch-matmul-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:53+00:00
    +

    deeplearning/dl-op-linalg-batch-matmul-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:53+00:00
     Running ./dl-op-linalg-batch-matmul-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    index 470eddc0..77af63d5 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:46+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:46+00:00
     Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    index aedb3e11..3bed0222 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:50+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:50+00:00
     Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    index 470c07c3..8a27412a 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:48+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:48+00:00
     Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    index f8ddea55..5d4dc2f6 100644
    --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:50+00:00
    +

    deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:50+00:00
     Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    index 031f97ea..89c239e9 100644
    --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-mathexp-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:17+00:00
    +

    deeplearning/dl-op-linalg-mathexp-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:17+00:00
     Running ./dl-op-linalg-mathexp-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    index 50282df9..5b5ec0c5 100644
    --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-mathfpow-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:13+00:00
    +

    deeplearning/dl-op-linalg-mathfpow-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:13+00:00
     Running ./dl-op-linalg-mathfpow-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    index 2b883a58..dedee70f 100644
    --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-mathrsqrt-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:15+00:00
    +

    deeplearning/dl-op-linalg-mathrsqrt-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:15+00:00
     Running ./dl-op-linalg-mathrsqrt-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    index 204a97cb..72d51506 100644
    --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-matmul-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:36+00:00
    +

    deeplearning/dl-op-linalg-matmul-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:36+00:00
     Running ./dl-op-linalg-matmul-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    index 59ad95b4..48049bd1 100644
    --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:33:51+00:00
    +

    deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:51+00:00
     Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    index 877a99c5..68a441c2 100644
    --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-reduceaddf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-reduceaddf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-reduceaddf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    index 84b00774..b8d684b1 100644
    --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-reducemaxf-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-reducemaxf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-reducemaxf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    index ef52e71d..48827bd4 100644
    --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    index 014eaa7b..d179f971 100644
    --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-matmul-transpose-b-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:22+00:00
    +

    deeplearning/dl-op-matmul-transpose-b-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:22+00:00
     Running ./dl-op-matmul-transpose-b-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    index cc8ee941..61decf7a 100644
    --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/dl-op-tosa-transpose-benchmark.log

    2025-05-26 15:04:56 UTC

    2025-05-25T16:34:22+00:00
    +

    deeplearning/dl-op-tosa-transpose-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:22+00:00
     Running ./dl-op-tosa-transpose-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/run_results_summary.html b/site/deeplearning/run_results_summary.html
    index 796bb9be..38230663 100644
    --- a/site/deeplearning/run_results_summary.html
    +++ b/site/deeplearning/run_results_summary.html
    @@ -1,4 +1,4 @@
    -

    deeplearning/run_results_summary.log

    2025-05-26 15:04:56 UTC

    [Success] Run of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/run_results_summary.log

    2025-05-26 19:08:59 UTC

    [Success] Run of 'dl-model-tinyllama-benchmark'
     [Success] Run of 'dl-model-mobilenetv3-benchmark'
     [Success] Run of 'dl-model-lenet-benchmark'
     [Missing] Executable not found for 'dl-model-bert-benchmark'
    diff --git a/site/geminiprocessing/build.html b/site/geminiprocessing/build.html
    index 2ea8adf6..a87bce5e 100644
    --- a/site/geminiprocessing/build.html
    +++ b/site/geminiprocessing/build.html
    @@ -1,4 +1,4 @@
    -

    geminiprocessing/build.log

    2025-05-26 15:04:56 UTC

    [1/21] Creating directories for 'project_googlebenchmark'
    +

    geminiprocessing/build.log

    2025-05-26 19:08:59 UTC

    [1/21] Creating directories for 'project_googlebenchmark'
     [2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o
     FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o 
     riscv64-unknown-linux-gnu-gcc  -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c
    diff --git a/site/geminiprocessing/cmake_configure.html b/site/geminiprocessing/cmake_configure.html
    index 624944dc..0326dc85 100644
    --- a/site/geminiprocessing/cmake_configure.html
    +++ b/site/geminiprocessing/cmake_configure.html
    @@ -1,4 +1,4 @@
    -

    geminiprocessing/cmake_configure.log

    2025-05-26 15:04:56 UTC

    -- The CXX compiler identification is GNU 9.2.0
    +

    geminiprocessing/cmake_configure.log

    2025-05-26 19:08:59 UTC

    -- The CXX compiler identification is GNU 9.2.0
     -- The C compiler identification is GNU 9.2.0
     -- Detecting CXX compiler ABI info
     -- Detecting CXX compiler ABI info - done
    diff --git a/site/index.html b/site/index.html
    index f1071e42..fa9d1d36 100644
    --- a/site/index.html
    +++ b/site/index.html
    @@ -1,34 +1,36 @@
    -

    Buddy-Benchmark results

    \ No newline at end of file +

    Buddy-Benchmark results

    \ No newline at end of file diff --git a/site/vectorization/vectorization_result.html b/site/vectorization/vectorization_result.html index 58cdf0c0..52baeca9 100644 --- a/site/vectorization/vectorization_result.html +++ b/site/vectorization/vectorization_result.html @@ -1,4 +1,4 @@ -

    vectorization/vectorization_result.log

    2025-05-26 15:04:56 UTC

    Vectorization Benchmark - Mon May 26 15:04:46 UTC 2025
    +

    vectorization/vectorization_result.log

    2025-05-26 19:08:59 UTC

    Vectorization Benchmark - Mon May 26 19:08:49 UTC 2025
     [Info] Starting vectorization-matrix-benchmark build...
     [Info] Running CMake configuration...
     -- The CXX compiler identification is GNU 11.4.0
    @@ -128,24 +128,24 @@ 

    vectorization/vectorization_result.log

    2025-05-26 15:04:56 UTC

    vectorization/vectorization_result.log

    2025-05-26 15:04:56 UTC

    vectorization/vectorization_result.log

    2025-05-26 15:04:56 UTC

    "${RUN_LOG}" # Clear/create the file +> "${RUN_LOG}" # clear / create the file echo "[Info] Running all benchmarks in ./bin..." for target in "${BENCHMARK_TARGETS[@]}"; do - if [ -f "${target}" ]; then + if [[ -f "${target}" ]]; then echo "==> Running ${target}" - if "./${target}" > "${BENCHMARK_PATH}/test_result/deeplearning/${target}.log" 2>&1; then - echo "[Success] Run of '${target}'" | tee -a "${RUN_LOG}" - echo " Output saved to test_result/deeplearning/${target}.log" + + # ---- NEW: dump a machine-readable report next to the plain log ----------- + json_out="${BENCHMARK_PATH}/test_result/deeplearning/${target}.json" + + if "./${target}" \ + --benchmark_out="${json_out}" \ + --benchmark_out_format=json \ + > "${BENCHMARK_PATH}/test_result/deeplearning/${target}.log" 2>&1 + then + echo "[Success] Run of '${target}'" | tee -a "${RUN_LOG}" + echo " ↳ stdout/stderr → ${target}.log" | tee -a "${RUN_LOG}" + echo " ↳ gbench JSON → ${target}.json" | tee -a "${RUN_LOG}" else - echo "[Failed] Run of '${target}'" | tee -a "${RUN_LOG}" - echo " Output saved to test_result/deeplearning/${target}.log (May contain error info)" + echo "[Failed] Run of '${target}'" | tee -a "${RUN_LOG}" + echo " ↳ stdout/stderr → ${target}.log (may contain errors)" | tee -a "${RUN_LOG}" fi + # ------------------------------------------------------------------------- else echo "[Missing] Executable not found for '${target}'" | tee -a "${RUN_LOG}" fi diff --git a/test/test_script_imageprocessing.sh b/test/test_script_imageprocessing.sh index ab89c912..ffbffa7c 100755 --- a/test/test_script_imageprocessing.sh +++ b/test/test_script_imageprocessing.sh @@ -25,28 +25,43 @@ boundaries=("CONSTANT_PADDING" "REPLICATE_PADDING") for feature in "${features[@]}"; do echo "Testing $feature support" | tee -a "$LOG" if supports "$feature"; then - echo "$feature is supported." | tee -a "$LOG" - mkdir -p build_${feature} && cd build_${feature} - cmake -G Ninja .. \ - -DCMAKE_BUILD_TYPE=RELEASE \ - -DIMAGE_PROCESSING_BENCHMARKS=ON \ - -DOpenCV_DIR=$PWD/../thirdparty/opencv/build/ \ - -DEIGEN_DIR=$PWD/../thirdparty/eigen/ \ - -DBUDDY_OPT_ATTR=$(echo "$feature" | tr '[:upper:]' '[:lower:]') \ - -DBUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build - ninja image-processing-benchmark - echo "Running image-processing-benchmark for $feature" | tee -a "$LOG" - for img in "${images[@]}"; do - for kern in "${kernels[@]}"; do - for morph in "${kernelmorphs[@]}"; do - for boundary in "${boundaries[@]}"; do - echo "Running: $img $kern $morph $boundary" | tee -a "$LOG" - ./bin/image-processing-benchmark "$img" "$kern" "$morph" "$boundary" 2>&1 | grep -v "Saved PNG file." >> "$LOG" - done - done - done - done - cd .. + echo "$feature is supported." | tee -a "$LOG" + mkdir -p build_${feature} && cd build_${feature} + cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DIMAGE_PROCESSING_BENCHMARKS=ON \ + -DOpenCV_DIR=$PWD/../thirdparty/opencv/build/ \ + -DEIGEN_DIR=$PWD/../thirdparty/eigen/ \ + -DBUDDY_OPT_ATTR=$(echo "$feature" | tr '[:upper:]' '[:lower:]') \ + -DBUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build + ninja image-processing-benchmark + echo "Running image-processing-benchmark for $feature" | tee -a "$LOG" + # --------------------------------------------------------------------------- + # inside the big loop – ONLY this section is changed + # --------------------------------------------------------------------------- + for img in "${images[@]}"; do + img_slug=$(basename "$img" .png) # YuTu → YuTu + for kern in "${kernels[@]}"; do + for morph in "${kernelmorphs[@]}"; do + for boundary in "${boundaries[@]}"; do + echo "Running: $img $kern $morph $boundary" | tee -a "$LOG" + + # ---- NEW: build a unique JSON filename --------------------------------- + slug="$(echo "${feature}_${img_slug}_${kern}_${morph}_${boundary}" \ + | tr ' /' '__')" + json_out="${RESULT_DIR}/${slug}.json" + # ----------------------------------------------------------------------- + + ./bin/image-processing-benchmark \ + "$img" "$kern" "$morph" "$boundary" \ + --benchmark_out="$json_out" \ + --benchmark_out_format=json \ + 2>&1 | grep -v "Saved PNG file." >> "$LOG" + done + done + done + done + cd .. else echo "CPU does not support $feature." | tee -a "$LOG" fi diff --git a/test/test_script_vectorizationprocessing.sh b/test/test_script_vectorizationprocessing.sh index 5a3e8584..2e081dcb 100755 --- a/test/test_script_vectorizationprocessing.sh +++ b/test/test_script_vectorizationprocessing.sh @@ -49,6 +49,10 @@ export QEMU_LD_PREFIX=/usr/riscv64-linux-gnu ################################################################################ cd bin echo "[Info] Running vectorization-matrix-benchmark..." | tee -a "${LOG_FILE}" -./vectorization-matrix-benchmark 2>&1 | tee -a "${LOG_FILE}" +./vectorization-matrix-benchmark \ + --benchmark_out="${RESULT_DIR}/vectorization_matrix.json" \ + --benchmark_out_format=json \ + 2>&1 | tee -a "${LOG_FILE}" + echo "[Info] Benchmark completed. Log saved to ${LOG_FILE}" \ No newline at end of file diff --git a/test_result/vectorization/vectorization_matrix.json b/test_result/vectorization/vectorization_matrix.json new file mode 100644 index 00000000..6413c8d4 --- /dev/null +++ b/test_result/vectorization/vectorization_matrix.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T19:37:29+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./vectorization-matrix-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [0.331543,0.212402,1.35498], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "MLIR_MatMul/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "MLIR_MatMul/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 37518184, + "real_time": 1.8746903146081575e+01, + "cpu_time": 1.8744373554967371e+01, + "time_unit": "ns" + }, + { + "name": "MLIR_MatVec/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_MatVec/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 34644700, + "real_time": 2.0348688271988031e+01, + "cpu_time": 2.0346287137715148e+01, + "time_unit": "ns" + } + ] +} diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log index 8e496642..290831b7 100755 --- a/test_result/vectorization/vectorization_result.log +++ b/test_result/vectorization/vectorization_result.log @@ -1,4 +1,4 @@ -Vectorization Benchmark - Mon May 26 15:04:46 UTC 2025 +Vectorization Benchmark - Mon May 26 19:37:22 UTC 2025 [Info] Starting vectorization-matrix-benchmark build... [Info] Running CMake configuration... -- The CXX compiler identification is GNU 11.4.0 @@ -37,8 +37,8 @@ Vectorization Benchmark - Mon May 26 15:04:46 UTC 2025 -- Generating done -- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build [Info] Building vectorization-matrix-benchmark... -[1/17] Generating mlir-matmul.o -[2/17] Generating mlir-matvec.o +[1/17] Generating mlir-matvec.o +[2/17] Generating mlir-matmul.o [3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a [4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a [5/17] Creating directories for 'project_googlebenchmark' @@ -128,24 +128,24 @@ Call Stack (most recent call first): -- Generating done -- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build [10/17] Performing build step for 'project_googlebenchmark' -[1/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[1/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o [2/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o -[3/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o +[3/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o [4/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o -[5/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o -[6/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o -[7/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o -[8/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o -[9/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o +[7/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o +[9/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o [10/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o -[11/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o -[12/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o -[13/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o -[14/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o +[11/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o +[12/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o [15/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o -[16/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o -[17/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o -[18/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o +[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o [19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o [20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o [21/22] Linking CXX static library src/libbenchmark.a @@ -180,7 +180,7 @@ Call Stack (most recent call first): [16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o [17/17] Linking CXX executable bin/vectorization-matrix-benchmark [Info] Running vectorization-matrix-benchmark... -2025-05-26T15:04:52+00:00 +2025-05-26T19:37:29+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -188,13 +188,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 25.57, 18.02, 9.67 +Load Average: 0.33, 0.21, 1.35 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 19.4 ns 19.4 ns 35846848 -MLIR_MatVec/1 21.3 ns 21.3 ns 32810882 +MLIR_MatMul/1 18.7 ns 18.7 ns 37518184 +MLIR_MatVec/1 20.3 ns 20.3 ns 34644700 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] From da02b21c250eeaae2708e5cd9d1f869d54679a5b Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Mon, 26 May 2025 21:55:27 +0200 Subject: [PATCH 15/52] ci: run benchmarks inside reusable docker container --- scripts/logs2html.py | 56 +++++++++++++------ .../build_results_crosscompile_summary.html | 3 +- site/deeplearning/build_results_summary.html | 3 +- site/deeplearning/dl-layer-ffn-benchmark.html | 3 +- .../dl-layer-rmsnorm-benchmark.html | 3 +- .../dl-layer-selfattention-benchmark.html | 3 +- .../dl-model-lenet-benchmark.html | 3 +- .../dl-model-mobilenetv3-benchmark.html | 3 +- .../dl-model-resnet18-benchmark.html | 3 +- .../dl-model-tinyllama-benchmark.html | 3 +- .../dl-model-whisper-benchmark.html | 3 +- .../dl-op-linalg-arithaddf-benchmark.html | 3 +- .../dl-op-linalg-arithdivf-benchmark.html | 3 +- .../dl-op-linalg-arithmulf-benchmark.html | 3 +- .../dl-op-linalg-arithnegf-benchmark.html | 3 +- .../dl-op-linalg-arithsubf-benchmark.html | 3 +- .../dl-op-linalg-batch-matmul-benchmark.html | 3 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 3 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 3 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 3 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 3 +- .../dl-op-linalg-mathexp-benchmark.html | 3 +- .../dl-op-linalg-mathfpow-benchmark.html | 3 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 3 +- .../dl-op-linalg-matmul-benchmark.html | 3 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 3 +- .../dl-op-linalg-reduceaddf-benchmark.html | 3 +- .../dl-op-linalg-reducemaxf-benchmark.html | 3 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 3 +- .../dl-op-matmul-transpose-b-benchmark.html | 3 +- .../dl-op-tosa-transpose-benchmark.html | 3 +- site/deeplearning/run_results_summary.html | 3 +- site/geminiprocessing/build.html | 3 +- site/geminiprocessing/cmake_configure.html | 3 +- site/index.html | 6 +- site/vectorization/vectorization_result.html | 39 ++++++------- .../vectorization/vectorization_matrix.json | 16 +++--- .../vectorization/vectorization_result.log | 40 ++++++------- 38 files changed, 155 insertions(+), 101 deletions(-) diff --git a/scripts/logs2html.py b/scripts/logs2html.py index 0aff2618..3123d0ff 100755 --- a/scripts/logs2html.py +++ b/scripts/logs2html.py @@ -1,42 +1,64 @@ #!/usr/bin/env python3 """ -Turn every *.log under into /.html -If a sibling *.json produced by Google Benchmark exists, render -its numbers as an HTML table right under the log. +Turn every *.json under into /.html. +If a twin *.log exists (same stem), show it in a collapsible
    . """ + import html, json, pathlib, datetime, sys src, dst = map(pathlib.Path, sys.argv[1:3]) dst.mkdir(parents=True, exist_ok=True) stamp = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC") -def gbench_json_to_table(js_path): +CSS = """ + +""" + +def gbench_json_to_table(js_path: pathlib.Path) -> str: data = json.loads(js_path.read_text())["benchmarks"] - head = "NameTime (ns)CPU (ns)Iterations" + head = ("NameTime (ns)" + "CPU (ns)Iterations") rows = "\n".join( - f"{b['name']}{b['real_time']:.1f}" - f"{b['cpu_time']:.1f}{b['iterations']}" + f"{html.escape(b['name'])}" + f"{b['real_time']:.1f}" + f"{b['cpu_time']:.1f}" + f"{b['iterations']}" for b in data if "name" in b ) - return f"

    Parsed numbers

    {head}{rows}
    " + return f"

    {js_path.name}

    \n{head}\n{rows}
    " + +# --------------------------------------------------------------------------- -for log in src.rglob("*.log"): - rel = log.relative_to(src) +for js in src.rglob("*.json"): + log = js.with_suffix(".log") # same stem, optional + rel = js.relative_to(src) page = dst / rel.with_suffix(".html") page.parent.mkdir(parents=True, exist_ok=True) - body = [f"

    {rel}

    {stamp}

    ", - f"
    {html.escape(log.read_text())}
    "] + body = [CSS, + f"

    {rel}

    {stamp}

    ", + gbench_json_to_table(js)] - json_peer = log.with_suffix(".json") - if json_peer.exists(): - body.append(gbench_json_to_table(json_peer)) + if log.exists(): # include console output if present + body.append("
    Console output\n" + f"
    {html.escape(log.read_text())}
    ") page.write_text("\n".join(body)) +# --------------------------------------------------------------------------- # rebuild index +# --------------------------------------------------------------------------- links = "\n".join( - f'
  • {p.relative_to(dst).as_posix()}
  • ' + f'
  • ' + f'{p.relative_to(dst).as_posix()}
  • ' for p in sorted(dst.rglob("*.html")) if p.name != "index.html" ) -(dst / "index.html").write_text(f"

    Buddy-Benchmark results

      {links}
    ") +(dst / "index.html").write_text(CSS + f"

    Buddy-Benchmark results

      \n{links}\n
    ") diff --git a/site/deeplearning/build_results_crosscompile_summary.html b/site/deeplearning/build_results_crosscompile_summary.html index e45f5d17..97716c87 100644 --- a/site/deeplearning/build_results_crosscompile_summary.html +++ b/site/deeplearning/build_results_crosscompile_summary.html @@ -1,4 +1,5 @@ -

    deeplearning/build_results_crosscompile_summary.log

    2025-05-26 19:08:59 UTC

    [Failed]  Build of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/build_results_crosscompile_summary.log

    2025-05-26 19:43:20 UTC

    +
    [Failed]  Build of 'dl-model-tinyllama-benchmark'
     [Failed]  Build of 'dl-model-mobilenetv3-benchmark'
     [Success] Build of 'dl-model-lenet-benchmark'
     [Failed]  Build of 'dl-model-bert-benchmark'
    diff --git a/site/deeplearning/build_results_summary.html b/site/deeplearning/build_results_summary.html
    index fc695e24..f308ef5a 100644
    --- a/site/deeplearning/build_results_summary.html
    +++ b/site/deeplearning/build_results_summary.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/build_results_summary.log

    2025-05-26 19:08:59 UTC

    [Success] Build of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/build_results_summary.log

    2025-05-26 19:43:20 UTC

    +
    [Success] Build of 'dl-model-tinyllama-benchmark'
     [Success] Build of 'dl-model-mobilenetv3-benchmark'
     [Success] Build of 'dl-model-lenet-benchmark'
     [Failed]  Build of 'dl-model-bert-benchmark'
    diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html
    index 98364083..6174c0ab 100644
    --- a/site/deeplearning/dl-layer-ffn-benchmark.html
    +++ b/site/deeplearning/dl-layer-ffn-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-layer-ffn-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:30+00:00
    +

    deeplearning/dl-layer-ffn-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:33:30+00:00
     Running ./dl-layer-ffn-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    index e99464ce..c795a127 100644
    --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-layer-rmsnorm-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:34+00:00
    +

    deeplearning/dl-layer-rmsnorm-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:33:34+00:00
     Running ./dl-layer-rmsnorm-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html
    index a7abafc0..5f475c02 100644
    --- a/site/deeplearning/dl-layer-selfattention-benchmark.html
    +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-layer-selfattention-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:32+00:00
    +

    deeplearning/dl-layer-selfattention-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:33:32+00:00
     Running ./dl-layer-selfattention-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html
    index 872705a2..e2683c3b 100644
    --- a/site/deeplearning/dl-model-lenet-benchmark.html
    +++ b/site/deeplearning/dl-model-lenet-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-model-lenet-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:29:36+00:00
    +

    deeplearning/dl-model-lenet-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:29:36+00:00
     Running ./dl-model-lenet-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    index bc2fba6e..40a7bdaa 100644
    --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-model-mobilenetv3-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:29:34+00:00
    +

    deeplearning/dl-model-mobilenetv3-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:29:34+00:00
     Running ./dl-model-mobilenetv3-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html
    index 9aaf6fe6..287feae8 100644
    --- a/site/deeplearning/dl-model-resnet18-benchmark.html
    +++ b/site/deeplearning/dl-model-resnet18-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-model-resnet18-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:27+00:00
    +

    deeplearning/dl-model-resnet18-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:33:27+00:00
     Running ./dl-model-resnet18-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html
    index b6de06b3..76bbaa71 100644
    --- a/site/deeplearning/dl-model-tinyllama-benchmark.html
    +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-model-tinyllama-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:24:01+00:00
    +

    deeplearning/dl-model-tinyllama-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:24:01+00:00
     Running ./dl-model-tinyllama-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html
    index c691b8f8..5bdee31b 100644
    --- a/site/deeplearning/dl-model-whisper-benchmark.html
    +++ b/site/deeplearning/dl-model-whisper-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-model-whisper-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:29:38+00:00
    +

    deeplearning/dl-model-whisper-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:29:38+00:00
     Running ./dl-model-whisper-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    index bf5ce9c2..3c85d562 100644
    --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-arithaddf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:03+00:00
    +

    deeplearning/dl-op-linalg-arithaddf-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:03+00:00
     Running ./dl-op-linalg-arithaddf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    index f7020ba6..d178a168 100644
    --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-arithdivf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:05+00:00
    +

    deeplearning/dl-op-linalg-arithdivf-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:05+00:00
     Running ./dl-op-linalg-arithdivf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    index 941d1b4f..fc5d88bd 100644
    --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-arithmulf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:07+00:00
    +

    deeplearning/dl-op-linalg-arithmulf-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:07+00:00
     Running ./dl-op-linalg-arithmulf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    index b29cfae1..a1b32f7e 100644
    --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-arithnegf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:09+00:00
    +

    deeplearning/dl-op-linalg-arithnegf-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:09+00:00
     Running ./dl-op-linalg-arithnegf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    index 8b1fa53f..cc10208c 100644
    --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-arithsubf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:11+00:00
    +

    deeplearning/dl-op-linalg-arithsubf-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:11+00:00
     Running ./dl-op-linalg-arithsubf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    index b40b1946..6766d126 100644
    --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-batch-matmul-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:53+00:00
    +

    deeplearning/dl-op-linalg-batch-matmul-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:33:53+00:00
     Running ./dl-op-linalg-batch-matmul-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    index 77af63d5..fa3568cd 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:46+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:33:46+00:00
     Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    index 3bed0222..c8467076 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:50+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:33:50+00:00
     Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    index 8a27412a..5f6f41f0 100644
    --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:48+00:00
    +

    deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:33:48+00:00
     Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    index 5d4dc2f6..78d50376 100644
    --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:50+00:00
    +

    deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:33:50+00:00
     Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    index 89c239e9..5d016d97 100644
    --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-mathexp-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:17+00:00
    +

    deeplearning/dl-op-linalg-mathexp-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:17+00:00
     Running ./dl-op-linalg-mathexp-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    index 5b5ec0c5..1ca87cea 100644
    --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-mathfpow-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:13+00:00
    +

    deeplearning/dl-op-linalg-mathfpow-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:13+00:00
     Running ./dl-op-linalg-mathfpow-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    index dedee70f..fe054df3 100644
    --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-mathrsqrt-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:15+00:00
    +

    deeplearning/dl-op-linalg-mathrsqrt-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:15+00:00
     Running ./dl-op-linalg-mathrsqrt-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    index 72d51506..f99b9067 100644
    --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-matmul-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:36+00:00
    +

    deeplearning/dl-op-linalg-matmul-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:33:36+00:00
     Running ./dl-op-linalg-matmul-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    index 48049bd1..c7b688df 100644
    --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:33:51+00:00
    +

    deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:33:51+00:00
     Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    index 68a441c2..3730cdfc 100644
    --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-reduceaddf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-reduceaddf-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-reduceaddf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    index b8d684b1..e667e8a6 100644
    --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-reducemaxf-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-reducemaxf-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-reducemaxf-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    index 48827bd4..2244ab45 100644
    --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:19+00:00
    +

    deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:19+00:00
     Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    index d179f971..9c5ce5a0 100644
    --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-matmul-transpose-b-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:22+00:00
    +

    deeplearning/dl-op-matmul-transpose-b-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:22+00:00
     Running ./dl-op-matmul-transpose-b-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    index 61decf7a..f8d27bda 100644
    --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/dl-op-tosa-transpose-benchmark.log

    2025-05-26 19:08:59 UTC

    2025-05-25T16:34:22+00:00
    +

    deeplearning/dl-op-tosa-transpose-benchmark.log

    2025-05-26 19:43:20 UTC

    +
    2025-05-25T16:34:22+00:00
     Running ./dl-op-tosa-transpose-benchmark
     Run on (24 X 5100 MHz CPU s)
     CPU Caches:
    diff --git a/site/deeplearning/run_results_summary.html b/site/deeplearning/run_results_summary.html
    index 38230663..bb376261 100644
    --- a/site/deeplearning/run_results_summary.html
    +++ b/site/deeplearning/run_results_summary.html
    @@ -1,4 +1,5 @@
    -

    deeplearning/run_results_summary.log

    2025-05-26 19:08:59 UTC

    [Success] Run of 'dl-model-tinyllama-benchmark'
    +

    deeplearning/run_results_summary.log

    2025-05-26 19:43:20 UTC

    +
    [Success] Run of 'dl-model-tinyllama-benchmark'
     [Success] Run of 'dl-model-mobilenetv3-benchmark'
     [Success] Run of 'dl-model-lenet-benchmark'
     [Missing] Executable not found for 'dl-model-bert-benchmark'
    diff --git a/site/geminiprocessing/build.html b/site/geminiprocessing/build.html
    index a87bce5e..7ef60cd9 100644
    --- a/site/geminiprocessing/build.html
    +++ b/site/geminiprocessing/build.html
    @@ -1,4 +1,5 @@
    -

    geminiprocessing/build.log

    2025-05-26 19:08:59 UTC

    [1/21] Creating directories for 'project_googlebenchmark'
    +

    geminiprocessing/build.log

    2025-05-26 19:43:20 UTC

    +
    [1/21] Creating directories for 'project_googlebenchmark'
     [2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o
     FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o 
     riscv64-unknown-linux-gnu-gcc  -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c
    diff --git a/site/geminiprocessing/cmake_configure.html b/site/geminiprocessing/cmake_configure.html
    index 0326dc85..f3db3469 100644
    --- a/site/geminiprocessing/cmake_configure.html
    +++ b/site/geminiprocessing/cmake_configure.html
    @@ -1,4 +1,5 @@
    -

    geminiprocessing/cmake_configure.log

    2025-05-26 19:08:59 UTC

    -- The CXX compiler identification is GNU 9.2.0
    +

    geminiprocessing/cmake_configure.log

    2025-05-26 19:43:20 UTC

    +
    -- The CXX compiler identification is GNU 9.2.0
     -- The C compiler identification is GNU 9.2.0
     -- Detecting CXX compiler ABI info
     -- Detecting CXX compiler ABI info - done
    diff --git a/site/index.html b/site/index.html
    index fa9d1d36..0c340a0c 100644
    --- a/site/index.html
    +++ b/site/index.html
    @@ -1,5 +1,4 @@
    -

    Buddy-Benchmark results

      -
    • deeplearning/build_results_crosscompile_summary.html
    • +

      Buddy-Benchmark results

      \ No newline at end of file diff --git a/site/vectorization/vectorization_result.html b/site/vectorization/vectorization_result.html index 52baeca9..892d586b 100644 --- a/site/vectorization/vectorization_result.html +++ b/site/vectorization/vectorization_result.html @@ -1,4 +1,5 @@ -

      vectorization/vectorization_result.log

      2025-05-26 19:08:59 UTC

      Vectorization Benchmark - Mon May 26 19:08:49 UTC 2025
      +

      vectorization/vectorization_result.log

      2025-05-26 19:43:20 UTC

      +
      Vectorization Benchmark - Mon May 26 19:43:10 UTC 2025
       [Info] Starting vectorization-matrix-benchmark build...
       [Info] Running CMake configuration...
       -- The CXX compiler identification is GNU 11.4.0
      @@ -128,20 +129,20 @@ 

      vectorization/vectorization_result.log

      2025-05-26 19:08:59 UTC

      vectorization/vectorization_result.log

      2025-05-26 19:08:59 UTC

      vectorization/vectorization_result.log

      2025-05-26 19:08:59 UTC

      Date: Mon, 26 May 2025 22:08:41 +0200 Subject: [PATCH 16/52] ci: run benchmarks inside reusable docker container --- .github/workflows/bench.yml | 1 + site/index.html | 17 +++++++- site/vectorization/vectorization_matrix.html | 16 ++++++++ .../vectorization/vectorization_matrix.json | 16 ++++---- .../vectorization/vectorization_result.log | 40 +++++++++---------- 5 files changed, 60 insertions(+), 30 deletions(-) create mode 100644 site/vectorization/vectorization_matrix.html diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 5e002603..c8105b08 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -48,6 +48,7 @@ jobs: - name: Build mini-site working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark run: | + rm -rf site python3 scripts/logs2html.py test_result site - name: Upload site artifact diff --git a/site/index.html b/site/index.html index 0c340a0c..cf31ad92 100644 --- a/site/index.html +++ b/site/index.html @@ -1,4 +1,15 @@ -

      Buddy-Benchmark results

      • deeplearning/build_results_crosscompile_summary.html
      • + + +

        Buddy-Benchmark results

        \ No newline at end of file diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html new file mode 100644 index 00000000..6d59b485 --- /dev/null +++ b/site/vectorization/vectorization_matrix.html @@ -0,0 +1,16 @@ + + + +

        vectorization/vectorization_matrix.json

        2025-05-26 20:00:08 UTC

        +

        vectorization_matrix.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        MLIR_MatMul/119.119.136916854
        MLIR_MatVec/120.820.833686586
        \ No newline at end of file diff --git a/test_result/vectorization/vectorization_matrix.json b/test_result/vectorization/vectorization_matrix.json index 4bffa0af..0c3e59ca 100644 --- a/test_result/vectorization/vectorization_matrix.json +++ b/test_result/vectorization/vectorization_matrix.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T19:43:17+00:00", + "date": "2025-05-26T20:00:05+00:00", "host_name": "4ed4bacfe45d", "executable": "./vectorization-matrix-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [23.3345,16.9849,8.30957], + "load_avg": [26.4492,18.2622,10.6772], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 36112317, - "real_time": 1.9326995663730528e+01, - "cpu_time": 1.9326281584202977e+01, + "iterations": 36916854, + "real_time": 1.9063199384214371e+01, + "cpu_time": 1.9060853641537282e+01, "time_unit": "ns" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 33468392, - "real_time": 2.1032179513922156e+01, - "cpu_time": 2.1031918205093334e+01, + "iterations": 33686586, + "real_time": 2.0769743381568315e+01, + "cpu_time": 2.0767210010536537e+01, "time_unit": "ns" } ] diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log index ec28e94d..8a4c65bb 100755 --- a/test_result/vectorization/vectorization_result.log +++ b/test_result/vectorization/vectorization_result.log @@ -1,4 +1,4 @@ -Vectorization Benchmark - Mon May 26 19:43:10 UTC 2025 +Vectorization Benchmark - Mon May 26 19:59:59 UTC 2025 [Info] Starting vectorization-matrix-benchmark build... [Info] Running CMake configuration... -- The CXX compiler identification is GNU 11.4.0 @@ -39,8 +39,8 @@ Vectorization Benchmark - Mon May 26 19:43:10 UTC 2025 [Info] Building vectorization-matrix-benchmark... [1/17] Generating mlir-matmul.o [2/17] Generating mlir-matvec.o -[3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a -[4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a +[3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a +[4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a [5/17] Creating directories for 'project_googlebenchmark' [6/17] Performing download step (git clone) for 'project_googlebenchmark' Cloning into 'project_googlebenchmark'... @@ -128,22 +128,22 @@ Call Stack (most recent call first): -- Generating done -- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build [10/17] Performing build step for 'project_googlebenchmark' -[1/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[1/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o [2/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o [3/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o -[4/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o -[5/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o -[6/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[4/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o [7/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o -[8/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o -[9/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o -[10/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o -[11/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o -[12/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o -[13/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o -[14/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o -[15/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o -[16/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[9/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o +[10/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o +[11/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o +[12/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o +[15/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o [17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o [18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o [19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o @@ -180,7 +180,7 @@ Call Stack (most recent call first): [16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o [17/17] Linking CXX executable bin/vectorization-matrix-benchmark [Info] Running vectorization-matrix-benchmark... -2025-05-26T19:43:17+00:00 +2025-05-26T20:00:05+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -188,13 +188,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 23.33, 16.98, 8.31 +Load Average: 26.45, 18.26, 10.68 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 19.3 ns 19.3 ns 36112317 -MLIR_MatVec/1 21.0 ns 21.0 ns 33468392 +MLIR_MatMul/1 19.1 ns 19.1 ns 36916854 +MLIR_MatVec/1 20.8 ns 20.8 ns 33686586 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] From ec3040dc68397023136e1b27094dde6d91a6453d Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Mon, 26 May 2025 23:26:38 +0200 Subject: [PATCH 17/52] git push --- .../build_results_crosscompile_summary.html | 31 - site/deeplearning/build_results_summary.html | 31 - site/deeplearning/dl-layer-ffn-benchmark.html | 20 - .../dl-layer-rmsnorm-benchmark.html | 20 - .../dl-layer-selfattention-benchmark.html | 20 - .../dl-model-lenet-benchmark.html | 21 - .../dl-model-mobilenetv3-benchmark.html | 21 - .../dl-model-resnet18-benchmark.html | 20 - .../dl-model-tinyllama-benchmark.html | 21 - .../dl-model-whisper-benchmark.html | 21 - .../dl-op-linalg-arithaddf-benchmark.html | 21 - .../dl-op-linalg-arithdivf-benchmark.html | 21 - .../dl-op-linalg-arithmulf-benchmark.html | 21 - .../dl-op-linalg-arithnegf-benchmark.html | 21 - .../dl-op-linalg-arithsubf-benchmark.html | 21 - .../dl-op-linalg-batch-matmul-benchmark.html | 27 - ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 21 - ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 23 - ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 21 - ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 21 - .../dl-op-linalg-mathexp-benchmark.html | 21 - .../dl-op-linalg-mathfpow-benchmark.html | 21 - .../dl-op-linalg-mathrsqrt-benchmark.html | 21 - .../dl-op-linalg-matmul-benchmark.html | 24 - ...-op-linalg-pooling-nhwc-sum-benchmark.html | 21 - .../dl-op-linalg-reduceaddf-benchmark.html | 12 - .../dl-op-linalg-reducemaxf-benchmark.html | 12 - ...-linalg-softmax-exp-sum-div-benchmark.html | 21 - .../dl-op-matmul-transpose-b-benchmark.html | 23 - .../dl-op-tosa-transpose-benchmark.html | 19 - site/deeplearning/run_results_summary.html | 31 - site/geminiprocessing/build.html | 657 ------------------ site/geminiprocessing/cmake_configure.html | 39 -- site/index.html | 34 - site/vectorization/vectorization_matrix.html | 6 +- site/vectorization/vectorization_result.html | 205 ------ test/test_script_deeplearning.sh | 2 +- .../build_results_crosscompile_summary.log | 0 .../deeplearning/build_results_summary.log | 0 .../deeplearning/dl-layer-ffn-benchmark.json | 68 ++ .../deeplearning/dl-layer-ffn-benchmark.log | 8 +- .../dl-layer-rmsnorm-benchmark.json | 68 ++ .../dl-layer-rmsnorm-benchmark.log | 8 +- .../dl-layer-selfattention-benchmark.json | 68 ++ .../dl-layer-selfattention-benchmark.log | 8 +- .../dl-model-lenet-benchmark.json | 68 ++ .../deeplearning/dl-model-lenet-benchmark.log | 8 +- .../dl-model-mobilenetv3-benchmark.json | 68 ++ .../dl-model-mobilenetv3-benchmark.log | 8 +- .../dl-model-resnet18-benchmark.json | 68 ++ .../dl-model-resnet18-benchmark.log | 8 +- .../dl-model-tinyllama-benchmark.json | 82 +++ .../dl-model-tinyllama-benchmark.log | 10 +- .../dl-model-whisper-benchmark.json | 68 ++ .../dl-model-whisper-benchmark.log | 8 +- .../dl-op-linalg-arithaddf-benchmark.json | 68 ++ .../dl-op-linalg-arithaddf-benchmark.log | 8 +- .../dl-op-linalg-arithdivf-benchmark.json | 68 ++ .../dl-op-linalg-arithdivf-benchmark.log | 8 +- .../dl-op-linalg-arithmulf-benchmark.json | 68 ++ .../dl-op-linalg-arithmulf-benchmark.log | 8 +- .../dl-op-linalg-arithnegf-benchmark.json | 68 ++ .../dl-op-linalg-arithnegf-benchmark.log | 8 +- .../dl-op-linalg-arithsubf-benchmark.json | 68 ++ .../dl-op-linalg-arithsubf-benchmark.log | 8 +- .../dl-op-linalg-batch-matmul-benchmark.json | 138 ++++ .../dl-op-linalg-batch-matmul-benchmark.log | 16 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.json | 68 ++ ...l-op-linalg-conv2d-nchw-fchw-benchmark.log | 6 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.json | 96 +++ ...l-op-linalg-conv2d-nhwc-fhwc-benchmark.log | 6 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.json | 68 ++ ...l-op-linalg-conv2d-nhwc-hwcf-benchmark.log | 8 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.json | 82 +++ ...g-depthwise-conv-2d-nhwc-hwc-benchmark.log | 8 +- .../dl-op-linalg-mathexp-benchmark.json | 68 ++ .../dl-op-linalg-mathexp-benchmark.log | 8 +- .../dl-op-linalg-mathfpow-benchmark.json | 68 ++ .../dl-op-linalg-mathfpow-benchmark.log | 8 +- .../dl-op-linalg-mathrsqrt-benchmark.json | 68 ++ .../dl-op-linalg-mathrsqrt-benchmark.log | 8 +- .../dl-op-linalg-matmul-benchmark.json | 110 +++ .../dl-op-linalg-matmul-benchmark.log | 14 +- ...-op-linalg-pooling-nhwc-sum-benchmark.json | 68 ++ ...l-op-linalg-pooling-nhwc-sum-benchmark.log | 8 +- .../dl-op-linalg-reduceaddf-benchmark.json | 38 + .../dl-op-linalg-reduceaddf-benchmark.log | 4 +- .../dl-op-linalg-reducemaxf-benchmark.json | 38 + .../dl-op-linalg-reducemaxf-benchmark.log | 4 +- ...-linalg-softmax-exp-sum-div-benchmark.json | 68 ++ ...p-linalg-softmax-exp-sum-div-benchmark.log | 8 +- .../dl-op-matmul-transpose-b-benchmark.json | 96 +++ .../dl-op-matmul-transpose-b-benchmark.log | 12 +- .../dl-op-tosa-transpose-benchmark.json | 68 ++ .../dl-op-tosa-transpose-benchmark.log | 8 +- .../deeplearning/run_results_summary.log | 54 ++ test_result/geminiprocessing/build.log | 655 ----------------- .../geminiprocessing/cmake_configure.log | 37 - .../image-processing-result.log | 179 +++++ .../vectorization/vectorization_matrix.json | 16 +- .../vectorization/vectorization_result.log | 36 +- 101 files changed, 2419 insertions(+), 2443 deletions(-) delete mode 100644 site/deeplearning/build_results_crosscompile_summary.html delete mode 100644 site/deeplearning/build_results_summary.html delete mode 100644 site/deeplearning/dl-layer-ffn-benchmark.html delete mode 100644 site/deeplearning/dl-layer-rmsnorm-benchmark.html delete mode 100644 site/deeplearning/dl-layer-selfattention-benchmark.html delete mode 100644 site/deeplearning/dl-model-lenet-benchmark.html delete mode 100644 site/deeplearning/dl-model-mobilenetv3-benchmark.html delete mode 100644 site/deeplearning/dl-model-resnet18-benchmark.html delete mode 100644 site/deeplearning/dl-model-tinyllama-benchmark.html delete mode 100644 site/deeplearning/dl-model-whisper-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-arithaddf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-arithdivf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-arithmulf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-arithnegf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-arithsubf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-mathexp-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-mathfpow-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-matmul-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html delete mode 100644 site/deeplearning/dl-op-matmul-transpose-b-benchmark.html delete mode 100644 site/deeplearning/dl-op-tosa-transpose-benchmark.html delete mode 100644 site/deeplearning/run_results_summary.html delete mode 100644 site/geminiprocessing/build.html delete mode 100644 site/geminiprocessing/cmake_configure.html delete mode 100644 site/vectorization/vectorization_result.html mode change 100755 => 100644 test_result/deeplearning/build_results_crosscompile_summary.log mode change 100755 => 100644 test_result/deeplearning/build_results_summary.log create mode 100644 test_result/deeplearning/dl-layer-ffn-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-layer-ffn-benchmark.log create mode 100644 test_result/deeplearning/dl-layer-rmsnorm-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-layer-rmsnorm-benchmark.log create mode 100644 test_result/deeplearning/dl-layer-selfattention-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-layer-selfattention-benchmark.log create mode 100644 test_result/deeplearning/dl-model-lenet-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-model-lenet-benchmark.log create mode 100644 test_result/deeplearning/dl-model-mobilenetv3-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-model-mobilenetv3-benchmark.log create mode 100644 test_result/deeplearning/dl-model-resnet18-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-model-resnet18-benchmark.log create mode 100644 test_result/deeplearning/dl-model-tinyllama-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-model-tinyllama-benchmark.log create mode 100644 test_result/deeplearning/dl-model-whisper-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-model-whisper-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-matmul-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-matmul-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log create mode 100644 test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log create mode 100644 test_result/deeplearning/dl-op-tosa-transpose-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-tosa-transpose-benchmark.log mode change 100755 => 100644 test_result/deeplearning/run_results_summary.log delete mode 100755 test_result/geminiprocessing/build.log delete mode 100755 test_result/geminiprocessing/cmake_configure.log create mode 100644 test_result/imageprocessing/image-processing-result.log diff --git a/site/deeplearning/build_results_crosscompile_summary.html b/site/deeplearning/build_results_crosscompile_summary.html deleted file mode 100644 index 97716c87..00000000 --- a/site/deeplearning/build_results_crosscompile_summary.html +++ /dev/null @@ -1,31 +0,0 @@ -

        deeplearning/build_results_crosscompile_summary.log

        2025-05-26 19:43:20 UTC

        -
        [Failed]  Build of 'dl-model-tinyllama-benchmark'
        -[Failed]  Build of 'dl-model-mobilenetv3-benchmark'
        -[Success] Build of 'dl-model-lenet-benchmark'
        -[Failed]  Build of 'dl-model-bert-benchmark'
        -[Failed]  Build of 'dl-model-whisper-benchmark'
        -[Failed]  Build of 'dl-model-resnet18-benchmark'
        -[Success] Build of 'dl-layer-ffn-benchmark'
        -[Success] Build of 'dl-layer-selfattention-benchmark'
        -[Success] Build of 'dl-layer-rmsnorm-benchmark'
        -[Failed]  Build of 'dl-op-linalg-matmul-benchmark'
        -[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark'
        -[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark'
        -[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark'
        -[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark'
        -[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark'
        -[Failed]  Build of 'dl-op-linalg-batch-matmul-benchmark'
        -[Success] Build of 'dl-op-linalg-arithaddf-benchmark'
        -[Success] Build of 'dl-op-linalg-arithdivf-benchmark'
        -[Success] Build of 'dl-op-linalg-arithmulf-benchmark'
        -[Success] Build of 'dl-op-linalg-arithnegf-benchmark'
        -[Success] Build of 'dl-op-linalg-arithsubf-benchmark'
        -[Success] Build of 'dl-op-linalg-mathfpow-benchmark'
        -[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark'
        -[Success] Build of 'dl-op-linalg-mathexp-benchmark'
        -[Success] Build of 'dl-op-linalg-reduceaddf-benchmark'
        -[Success] Build of 'dl-op-linalg-reducemaxf-benchmark'
        -[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark'
        -[Failed]  Build of 'dl-op-tosa-transpose-benchmark'
        -[Failed]  Build of 'dl-op-matmul-transpose-b-benchmark'
        -
        \ No newline at end of file diff --git a/site/deeplearning/build_results_summary.html b/site/deeplearning/build_results_summary.html deleted file mode 100644 index f308ef5a..00000000 --- a/site/deeplearning/build_results_summary.html +++ /dev/null @@ -1,31 +0,0 @@ -

        deeplearning/build_results_summary.log

        2025-05-26 19:43:20 UTC

        -
        [Success] Build of 'dl-model-tinyllama-benchmark'
        -[Success] Build of 'dl-model-mobilenetv3-benchmark'
        -[Success] Build of 'dl-model-lenet-benchmark'
        -[Failed]  Build of 'dl-model-bert-benchmark'
        -[Success] Build of 'dl-model-whisper-benchmark'
        -[Success] Build of 'dl-model-resnet18-benchmark'
        -[Success] Build of 'dl-layer-ffn-benchmark'
        -[Success] Build of 'dl-layer-selfattention-benchmark'
        -[Success] Build of 'dl-layer-rmsnorm-benchmark'
        -[Success] Build of 'dl-op-linalg-matmul-benchmark'
        -[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark'
        -[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark'
        -[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark'
        -[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark'
        -[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark'
        -[Success] Build of 'dl-op-linalg-batch-matmul-benchmark'
        -[Success] Build of 'dl-op-linalg-arithaddf-benchmark'
        -[Success] Build of 'dl-op-linalg-arithdivf-benchmark'
        -[Success] Build of 'dl-op-linalg-arithmulf-benchmark'
        -[Success] Build of 'dl-op-linalg-arithnegf-benchmark'
        -[Success] Build of 'dl-op-linalg-arithsubf-benchmark'
        -[Success] Build of 'dl-op-linalg-mathfpow-benchmark'
        -[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark'
        -[Success] Build of 'dl-op-linalg-mathexp-benchmark'
        -[Success] Build of 'dl-op-linalg-reduceaddf-benchmark'
        -[Success] Build of 'dl-op-linalg-reducemaxf-benchmark'
        -[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark'
        -[Success] Build of 'dl-op-tosa-transpose-benchmark'
        -[Success] Build of 'dl-op-matmul-transpose-b-benchmark'
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html deleted file mode 100644 index 6174c0ab..00000000 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ /dev/null @@ -1,20 +0,0 @@ -

        deeplearning/dl-layer-ffn-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:33:30+00:00
        -Running ./dl-layer-ffn-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.14, 3.58
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------------------------
        -Benchmark                                Time             CPU   Iterations
        ---------------------------------------------------------------------------
        -DL_LAYER_FFN/Scalar                  0.065 ms        0.065 ms        10714
        -DL_LAYER_FFN/Auto_Vectorization      0.027 ms        0.027 ms        25753
        ------------------------------------------------------------
        -Correctness Verification: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html deleted file mode 100644 index c795a127..00000000 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ /dev/null @@ -1,20 +0,0 @@ -

        deeplearning/dl-layer-rmsnorm-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:33:34+00:00
        -Running ./dl-layer-rmsnorm-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.13, 3.57
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        -------------------------------------------------------------------------------
        -Benchmark                                    Time             CPU   Iterations
        -------------------------------------------------------------------------------
        -DL_LAYER_RMSNORM/Scalar                  0.002 ms        0.002 ms       360260
        -DL_LAYER_RMSNORM/Auto_Vectorization      0.001 ms        0.001 ms       748474
        ------------------------------------------------------------
        -Correctness Verification: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html deleted file mode 100644 index 5f475c02..00000000 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ /dev/null @@ -1,20 +0,0 @@ -

        deeplearning/dl-layer-selfattention-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:33:32+00:00
        -Running ./dl-layer-selfattention-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.14, 3.58
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------------------------------
        -Benchmark                                      Time             CPU   Iterations
        ---------------------------------------------------------------------------------
        -DL_LAYER_ATTENTION/Scalar                   4.68 ms         4.68 ms          150
        -DL_LAYER_ATTENTION/Auto_Vectorization       1.57 ms         1.57 ms          446
        ------------------------------------------------------------
        -Correctness Verification: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html deleted file mode 100644 index e2683c3b..00000000 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-model-lenet-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:29:36+00:00
        -Running ./dl-model-lenet-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.32, 4.34
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ------------------------------------------------------------------------------
        -Benchmark                                   Time             CPU   Iterations
        ------------------------------------------------------------------------------
        -DL_MODEL_LENET/Auto_Vectorization       0.152 ms        0.152 ms         4530
        -DL_MODEL_LENET/Buddy_Vectorization      0.136 ms        0.136 ms         5149
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html deleted file mode 100644 index 40a7bdaa..00000000 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-model-mobilenetv3-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:29:34+00:00
        -Running ./dl-model-mobilenetv3-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.32, 4.34
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ------------------------------------------------------------------------------------
        -Benchmark                                         Time             CPU   Iterations
        ------------------------------------------------------------------------------------
        -BM_MobileNet_V3/BM_MobileNet_V3_scalar         35.1 ms         35.1 ms           20
        -BM_MobileNet_V3/BM_MobileNet_V3_conv_opt       32.0 ms         32.0 ms           22
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html deleted file mode 100644 index 287feae8..00000000 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ /dev/null @@ -1,20 +0,0 @@ -

        deeplearning/dl-model-resnet18-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:33:27+00:00
        -Running ./dl-model-resnet18-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.14, 3.59
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------------------------------
        -Benchmark                                      Time             CPU   Iterations
        ---------------------------------------------------------------------------------
        -DL_MODEL_Resnet18/Auto_Vectorization         720 ms          720 ms            1
        -DL_MODEL_Resnet18/Buddy_Vectorization        719 ms          719 ms            1
        ------------------------------------------------------------
        -Correctness Verification: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html deleted file mode 100644 index 76bbaa71..00000000 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-model-tinyllama-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:24:01+00:00
        -Running ./dl-model-tinyllama-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.79, 2.00, 5.81
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        -----------------------------------------------------------------------------
        -Benchmark                                  Time             CPU   Iterations
        -----------------------------------------------------------------------------
        -DL_MODEL_TINYLLAMA/scalar             160502 ms       160495 ms            1
        -DL_MODEL_TINYLLAMA/matmul_opt           9595 ms         9595 ms            1
        -DL_MODEL_TINYLLAMA/matmul_opt_omp       7607 ms         6928 ms            1
        ----------- Verification ----------
        -matmul_opt PASS
        -matmul_opt_omp PASS
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html deleted file mode 100644 index 5bdee31b..00000000 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-model-whisper-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:29:38+00:00
        -Running ./dl-model-whisper-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.31, 4.32
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        --------------------------------------------------------------------------------
        -Benchmark                                     Time             CPU   Iterations
        --------------------------------------------------------------------------------
        -DL_MODEL_Whisper/Auto_Vectorization       77089 ms        77086 ms            1
        -DL_MODEL_Whisper/Buddy_Vectorization      35954 ms        35953 ms            1
        ------------------------------------------------------------
        -Correctness Verification for Output1: PASS
        -Correctness Verification for Output2: FAIL
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html deleted file mode 100644 index 3c85d562..00000000 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-arithaddf-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:03+00:00
        -Running ./dl-op-linalg-arithaddf-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.12, 3.48
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------------------
        -Benchmark                          Time             CPU   Iterations
        ---------------------------------------------------------------------
        -BM_ADDF_SCALAR                 0.030 ms        0.030 ms        23576
        -BM_ADDF_AutoVectorization      0.004 ms        0.004 ms       174965
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html deleted file mode 100644 index d178a168..00000000 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-arithdivf-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:05+00:00
        -Running ./dl-op-linalg-arithdivf-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.12, 3.48
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------------------
        -Benchmark                          Time             CPU   Iterations
        ---------------------------------------------------------------------
        -BM_DIVF_SCALAR                 0.030 ms        0.030 ms        23149
        -BM_DIVF_AutoVectorization      0.009 ms        0.009 ms        73790
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html deleted file mode 100644 index fc5d88bd..00000000 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-arithmulf-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:07+00:00
        -Running ./dl-op-linalg-arithmulf-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.12, 3.48
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------------------
        -Benchmark                          Time             CPU   Iterations
        ---------------------------------------------------------------------
        -BM_MULF_SCALAR                 0.030 ms        0.030 ms        23959
        -BM_MULF_AutoVectorization      0.004 ms        0.004 ms       175122
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html deleted file mode 100644 index a1b32f7e..00000000 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-arithnegf-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:09+00:00
        -Running ./dl-op-linalg-arithnegf-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.12, 3.47
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------------------
        -Benchmark                          Time             CPU   Iterations
        ---------------------------------------------------------------------
        -BM_NEGF_SCALAR                 0.023 ms        0.023 ms        30704
        -BM_NEGF_AutoVectorization      0.003 ms        0.003 ms       212512
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html deleted file mode 100644 index cc10208c..00000000 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-arithsubf-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:11+00:00
        -Running ./dl-op-linalg-arithsubf-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.12, 3.47
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------------------
        -Benchmark                          Time             CPU   Iterations
        ---------------------------------------------------------------------
        -BM_SUBF_SCALAR                 0.030 ms        0.030 ms        23752
        -BM_SUBF_AutoVectorization      0.005 ms        0.005 ms       174941
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html deleted file mode 100644 index 6766d126..00000000 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ /dev/null @@ -1,27 +0,0 @@ -

        deeplearning/dl-op-linalg-batch-matmul-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:33:53+00:00
        -Running ./dl-op-linalg-batch-matmul-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.12, 3.51
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ----------------------------------------------------------------------------------------------
        -Benchmark                                                   Time             CPU   Iterations
        ----------------------------------------------------------------------------------------------
        -DL_OPS_BATCH_MATMUL/Scalar/iterations:1                  3525 ms         3525 ms            1
        -DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1        974 ms          974 ms            1
        -DL_OPS_BATCH_MATMUL/Vectorization/iterations:1            190 ms          190 ms            1
        -DL_OPS_BATCH_MATMUL/Tile/iterations:1                     109 ms          109 ms            1
        -DL_OPS_BATCH_MATMUL/SCF/iterations:1                      117 ms          117 ms            1
        -DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1                352 ms          352 ms            1
        -DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1           80.7 ms         53.0 ms            1
        ----------- Verification ----------
        -Tile PASS
        -SCF PASS
        -BROADCAST PASS
        -BROADCAST_OMP PASS
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html deleted file mode 100644 index fa3568cd..00000000 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:33:46+00:00
        -Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.13, 3.54
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        --------------------------------------------------------------------
        -Benchmark                         Time             CPU   Iterations
        --------------------------------------------------------------------
        -BM_Conv2DNchwFchw_SCALAR        282 ms          282 ms            2
        -BM_Conv2DNchwFchw_Im2col       8.35 ms         8.35 ms           86
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html deleted file mode 100644 index c8467076..00000000 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ /dev/null @@ -1,23 +0,0 @@ -

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:33:50+00:00
        -Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.13, 3.52
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ----------------------------------------------------------------------------------------------------
        -Benchmark                                                         Time             CPU   Iterations
        ----------------------------------------------------------------------------------------------------
        -DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5                   72.3 ms         72.3 ms            5
        -DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5       9.34 ms         9.34 ms            5
        -DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5            1.74 ms         1.74 ms            5
        -DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5                 1.73 ms         1.73 ms            5
        ----------- Verification ----------
        -auto_vectorization PASS
        -vectorization PASS
        -vec_tile PASS
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html deleted file mode 100644 index 5f6f41f0..00000000 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:33:48+00:00
        -Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.13, 3.52
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ----------------------------------------------------------------------------------
        -Benchmark                                       Time             CPU   Iterations
        ----------------------------------------------------------------------------------
        -BM_CONV_2D_NHWC_HWCF_SCALAR                  32.3 ms         32.3 ms           22
        -BM_CONV_2D_NHWC_HWCF_AutoVectorization       6.14 ms         6.14 ms          114
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html deleted file mode 100644 index 78d50376..00000000 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:33:50+00:00
        -Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.13, 3.52
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        -------------------------------------------------------------------------------------------------------------
        -Benchmark                                                                  Time             CPU   Iterations
        -------------------------------------------------------------------------------------------------------------
        -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5                   6.55 ms         6.54 ms            5
        -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5       1.68 ms         1.68 ms            5
        -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5           0.124 ms        0.124 ms            5
        ----------- Verification ----------
        -auto_vectorization PASS
        -vectorization PASS
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html deleted file mode 100644 index 5d016d97..00000000 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-mathexp-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:17+00:00
        -Running ./dl-op-linalg-mathexp-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.11, 3.46
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        --------------------------------------------------------------------
        -Benchmark                         Time             CPU   Iterations
        --------------------------------------------------------------------
        -BM_EXP_SCALAR                 0.046 ms        0.046 ms        15245
        -BM_EXP_AutoVectorization      0.031 ms        0.031 ms        22544
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html deleted file mode 100644 index 1ca87cea..00000000 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-mathfpow-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:13+00:00
        -Running ./dl-op-linalg-mathfpow-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.11, 3.46
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------------------
        -Benchmark                          Time             CPU   Iterations
        ---------------------------------------------------------------------
        -BM_FPOW_SCALAR                 0.084 ms        0.084 ms         8153
        -BM_FPOW_AutoVectorization      0.057 ms        0.057 ms        12317
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html deleted file mode 100644 index fe054df3..00000000 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:15+00:00
        -Running ./dl-op-linalg-mathrsqrt-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.11, 3.46
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ----------------------------------------------------------------------
        -Benchmark                           Time             CPU   Iterations
        ----------------------------------------------------------------------
        -BM_RSQRT_SCALAR                 0.073 ms        0.073 ms         9557
        -BM_RSQRT_AutoVectorization      0.004 ms        0.004 ms       161107
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html deleted file mode 100644 index f99b9067..00000000 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ /dev/null @@ -1,24 +0,0 @@ -

        deeplearning/dl-op-linalg-matmul-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:33:36+00:00
        -Running ./dl-op-linalg-matmul-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.13, 3.57
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        --------------------------------------------------------------------------------
        -Benchmark                                     Time             CPU   Iterations
        --------------------------------------------------------------------------------
        -DL_OPS_MATMUL/scalar_O0/iterations:1       3394 ms         3394 ms            1
        -DL_OPS_MATMUL/scalar_O3/iterations:1       2944 ms         2944 ms            1
        -DL_OPS_MATMUL/tile/iterations:1             120 ms          120 ms            1
        -DL_OPS_MATMUL/vec/iterations:1              139 ms          139 ms            1
        -DL_OPS_MATMUL/vec_omp/iterations:1         67.8 ms         17.8 ms            1
        ----------- Verification ----------
        -tile PASS
        -vec PASS
        -vec_omp PASS
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html deleted file mode 100644 index c7b688df..00000000 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:33:51+00:00
        -Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.13, 3.52
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------------------------------
        -Benchmark                                      Time             CPU   Iterations
        ---------------------------------------------------------------------------------
        -BM_POOLING_NHWC_SUM_SCALAR                 0.233 ms        0.233 ms         2997
        -BM_POOLING_NHWC_SUM_AutoVectorization      0.042 ms        0.042 ms        16895
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html deleted file mode 100644 index 3730cdfc..00000000 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ /dev/null @@ -1,12 +0,0 @@ -

        deeplearning/dl-op-linalg-reduceaddf-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:19+00:00
        -Running ./dl-op-linalg-reduceaddf-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.11, 3.44
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html deleted file mode 100644 index e667e8a6..00000000 --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html +++ /dev/null @@ -1,12 +0,0 @@ -

        deeplearning/dl-op-linalg-reducemaxf-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:19+00:00
        -Running ./dl-op-linalg-reducemaxf-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.11, 3.44
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html deleted file mode 100644 index 2244ab45..00000000 --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html +++ /dev/null @@ -1,21 +0,0 @@ -

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:19+00:00
        -Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.11, 3.44
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------------------------------
        -Benchmark                                      Time             CPU   Iterations
        ---------------------------------------------------------------------------------
        -BM_SOFTMAXEXPSUMDIV_SCALAR                 0.006 ms        0.006 ms       123343
        -BM_SOFTMAXEXPSUMDIV_AutoVectorization      0.004 ms        0.004 ms       181973
        ------------------------------------------------------------
        -Correctness Verification:
        -Transform case: PASS
        ------------------------------------------------------------
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html deleted file mode 100644 index 9c5ce5a0..00000000 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ /dev/null @@ -1,23 +0,0 @@ -

        deeplearning/dl-op-matmul-transpose-b-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:22+00:00
        -Running ./dl-op-matmul-transpose-b-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.11, 3.44
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ------------------------------------------------------------------------------------------------
        -Benchmark                                                     Time             CPU   Iterations
        ------------------------------------------------------------------------------------------------
        -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5           1262 ms         1262 ms            5
        -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5            311 ms          311 ms            5
        -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5       33.9 ms         22.0 ms            5
        -DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5                 85.3 ms         85.3 ms            5
        ----------- Verification ----------
        -scalar_O3 PASS
        -scalar_O3_omp PASS
        -vec PASS
        -
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html deleted file mode 100644 index f8d27bda..00000000 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ /dev/null @@ -1,19 +0,0 @@ -

        deeplearning/dl-op-tosa-transpose-benchmark.log

        2025-05-26 19:43:20 UTC

        -
        2025-05-25T16:34:22+00:00
        -Running ./dl-op-tosa-transpose-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 1.00, 1.11, 3.44
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        --------------------------------------------------------------------------------------
        -Benchmark                                           Time             CPU   Iterations
        --------------------------------------------------------------------------------------
        -DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5       25.6 ms         19.9 ms            5
        -DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5       19.1 ms         16.2 ms            5
        ----------- Verification ----------
        -scalar_O3 PASS
        -
        \ No newline at end of file diff --git a/site/deeplearning/run_results_summary.html b/site/deeplearning/run_results_summary.html deleted file mode 100644 index bb376261..00000000 --- a/site/deeplearning/run_results_summary.html +++ /dev/null @@ -1,31 +0,0 @@ -

        deeplearning/run_results_summary.log

        2025-05-26 19:43:20 UTC

        -
        [Success] Run of 'dl-model-tinyllama-benchmark'
        -[Success] Run of 'dl-model-mobilenetv3-benchmark'
        -[Success] Run of 'dl-model-lenet-benchmark'
        -[Missing] Executable not found for 'dl-model-bert-benchmark'
        -[Success] Run of 'dl-model-whisper-benchmark'
        -[Success] Run of 'dl-model-resnet18-benchmark'
        -[Success] Run of 'dl-layer-ffn-benchmark'
        -[Success] Run of 'dl-layer-selfattention-benchmark'
        -[Success] Run of 'dl-layer-rmsnorm-benchmark'
        -[Success] Run of 'dl-op-linalg-matmul-benchmark'
        -[Success] Run of 'dl-op-linalg-conv2d-nchw-fchw-benchmark'
        -[Success] Run of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark'
        -[Success] Run of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark'
        -[Success] Run of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark'
        -[Success] Run of 'dl-op-linalg-pooling-nhwc-sum-benchmark'
        -[Success] Run of 'dl-op-linalg-batch-matmul-benchmark'
        -[Success] Run of 'dl-op-linalg-arithaddf-benchmark'
        -[Success] Run of 'dl-op-linalg-arithdivf-benchmark'
        -[Success] Run of 'dl-op-linalg-arithmulf-benchmark'
        -[Success] Run of 'dl-op-linalg-arithnegf-benchmark'
        -[Success] Run of 'dl-op-linalg-arithsubf-benchmark'
        -[Success] Run of 'dl-op-linalg-mathfpow-benchmark'
        -[Success] Run of 'dl-op-linalg-mathrsqrt-benchmark'
        -[Success] Run of 'dl-op-linalg-mathexp-benchmark'
        -[Failed]  Run of 'dl-op-linalg-reduceaddf-benchmark'
        -[Failed]  Run of 'dl-op-linalg-reducemaxf-benchmark'
        -[Success] Run of 'dl-op-linalg-softmax-exp-sum-div-benchmark'
        -[Success] Run of 'dl-op-tosa-transpose-benchmark'
        -[Success] Run of 'dl-op-matmul-transpose-b-benchmark'
        -
        \ No newline at end of file diff --git a/site/geminiprocessing/build.html b/site/geminiprocessing/build.html deleted file mode 100644 index 7ef60cd9..00000000 --- a/site/geminiprocessing/build.html +++ /dev/null @@ -1,657 +0,0 @@ -

        geminiprocessing/build.log

        2025-05-26 19:43:20 UTC

        -
        [1/21] Creating directories for 'project_googlebenchmark'
        -[2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o
        -FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o 
        -riscv64-unknown-linux-gnu-gcc  -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4':
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given
        -   28 |   gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0);
        -      |                                               ^
        -In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23:
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:251: note: macro "gemmini_extended_config_ex" defined here
        -  251 | #define gemmini_extended_config_ex(dataflow, sys_act, sys_shift, relu6_shift, A_stride, A_transpose, B_transpose) \
        -      | 
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: error: 'gemmini_extended_config_ex' undeclared (first use in this function)
        -   28 |   gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0);
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: note: each undeclared identifier is reported only once for each function it appears in
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:35:18: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
        -   35 |   int32_t *res = (int32_t*) ((uint32_t)gemm_acc_malloc (16 * 16 * 4 * 4 * sizeof(int32_t)));
        -      |                  ^
        -In file included from /home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:20,
        -                 from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23:
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   66 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload'
        -   66 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:119: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   66 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                       ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload'
        -   66 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   67 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   67 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   68 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload'
        -   68 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:125: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   68 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                             ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload'
        -   68 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   69 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   69 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   70 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload'
        -   70 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   70 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                     ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload'
        -   70 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   71 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   71 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   72 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload'
        -   72 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   72 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                     ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload'
        -   72 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   73 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   73 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   74 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload'
        -   74 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:126: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   74 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                              ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload'
        -   74 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   75 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   75 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   76 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload'
        -   76 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:132: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   76 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                    ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload'
        -   76 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   77 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   77 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   78 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload'
        -   78 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   78 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload'
        -   78 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   79 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   79 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   80 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload'
        -   80 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   80 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload'
        -   80 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   81 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   81 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   82 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload'
        -   82 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   82 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                      ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload'
        -   82 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   83 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   83 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   84 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload'
        -   84 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   84 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload'
        -   84 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   85 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   85 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   86 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload'
        -   86 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   86 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                                    ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload'
        -   86 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   87 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   87 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   88 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload'
        -   88 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   88 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                                    ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload'
        -   88 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   89 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   89 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   90 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload'
        -   90 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   90 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                      ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload'
        -   90 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   91 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   91 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   92 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload'
        -   92 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   92 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload'
        -   92 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   93 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   93 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   94 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload'
        -   94 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   94 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                                    ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload'
        -   94 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   95 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   95 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   96 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                  ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload'
        -   96 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   96 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |                                                                                                                                                    ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  232 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload'
        -   96 |         gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   97 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |                                            ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |               ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  219 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:9: note: in expansion of macro 'gemmini_extended_compute_preloaded'
        -   97 |         gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
        -      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:89: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   98 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) );
        -      |                                                                                         ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  212 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:9: note: in expansion of macro 'gemmini_extended_mvout'
        -   98 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) );
        -      |         ^~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -   99 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) );
        -      |                                                                                              ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  212 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:9: note: in expansion of macro 'gemmini_extended_mvout'
        -   99 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) );
        -      |         ^~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -  100 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) );
        -      |                                                                                              ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  212 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:9: note: in expansion of macro 'gemmini_extended_mvout'
        -  100 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) );
        -      |         ^~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -  101 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) );
        -      |                                                                                              ^
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R'
        -  152 |         : "r"(rs1), "r"(rs2));                                                       \
        -      |                         ^~~
        -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2'
        -  212 |   ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT)
        -      |   ^~~~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:9: note: in expansion of macro 'gemmini_extended_mvout'
        -  101 |         gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) );
        -      |         ^~~~~~~~~~~~~~~~~~~~~~
        -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:105:17: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
        -  105 |   gemm_acc_free((uint32_t)(res));
        -      |                 ^
        -[3/21] Generating buddy_matmul.o
        -[4/21] Building CXX object benchmarks/Gemmini/ResNet-101/CMakeFiles/CRunnerUtils.dir/CRunnerUtils.cpp.o
        -[5/21] Performing download step (git clone) for 'project_googlebenchmark'
        -Cloning into 'project_googlebenchmark'...
        -HEAD is now at f91b6b4 bump version to 1.6 in preparation for release
        -[6/21] Generating resnet-101.o
        -ninja: build stopped: subcommand failed.
        -
        \ No newline at end of file diff --git a/site/geminiprocessing/cmake_configure.html b/site/geminiprocessing/cmake_configure.html deleted file mode 100644 index f3db3469..00000000 --- a/site/geminiprocessing/cmake_configure.html +++ /dev/null @@ -1,39 +0,0 @@ -

        geminiprocessing/cmake_configure.log

        2025-05-26 19:43:20 UTC

        -
        -- The CXX compiler identification is GNU 9.2.0
        --- The C compiler identification is GNU 9.2.0
        --- Detecting CXX compiler ABI info
        --- Detecting CXX compiler ABI info - done
        --- Check for working CXX compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-g++ - skipped
        --- Detecting CXX compile features
        --- Detecting CXX compile features - done
        --- Detecting C compiler ABI info
        --- Detecting C compiler ABI info - done
        --- Check for working C compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-gcc - skipped
        --- Detecting C compile features
        --- Detecting C compile features - done
        --- Configuring Target Architecture: avx512f
        --- Configuring Target Triple: x86_64-unknown-linux-gnu
        --- Configuring benchmarks: google
        --- Performing Test CMAKE_HAVE_LIBC_PTHREAD
        --- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
        --- Looking for pthread_create in pthreads
        --- Looking for pthread_create in pthreads - not found
        --- Looking for pthread_create in pthread
        --- Looking for pthread_create in pthread - found
        --- Found Threads: TRUE  
        --- Performing Test HAVE_SSE
        --- Performing Test HAVE_SSE - Failed
        --- 	SSE support - no
        --- Performing Test HAVE_AVX2
        --- Performing Test HAVE_AVX2 - Failed
        --- 	AVX2 support - no
        --- Performing Test HAVE_AVX512
        --- Performing Test HAVE_AVX512 - Failed
        --- 	AVX512 support - no
        --- Performing Test HAVE_NEON
        --- Performing Test HAVE_NEON - Failed
        --- 	Arm Neon support - no
        --- Configuring done
        --- Generating done
        --- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build
        -
        \ No newline at end of file diff --git a/site/index.html b/site/index.html index cf31ad92..88dbcf5b 100644 --- a/site/index.html +++ b/site/index.html @@ -9,39 +9,5 @@ summary{font-weight:600;cursor:pointer}

        Buddy-Benchmark results

        \ No newline at end of file diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index 6d59b485..222a42ff 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -9,8 +9,8 @@ summary{font-weight:600;cursor:pointer} -

        vectorization/vectorization_matrix.json

        2025-05-26 20:00:08 UTC

        +

        vectorization/vectorization_matrix.json

        2025-05-26 20:28:57 UTC

        vectorization_matrix.json

        - -
        NameTime (ns)CPU (ns)Iterations
        MLIR_MatMul/119.119.136916854
        MLIR_MatVec/120.820.833686586
        \ No newline at end of file +MLIR_MatMul/119.119.136693381 +MLIR_MatVec/120.920.933941913 \ No newline at end of file diff --git a/site/vectorization/vectorization_result.html b/site/vectorization/vectorization_result.html deleted file mode 100644 index 892d586b..00000000 --- a/site/vectorization/vectorization_result.html +++ /dev/null @@ -1,205 +0,0 @@ -

        vectorization/vectorization_result.log

        2025-05-26 19:43:20 UTC

        -
        Vectorization Benchmark - Mon May 26 19:43:10 UTC 2025
        -[Info] Starting vectorization-matrix-benchmark build...
        -[Info] Running CMake configuration...
        --- The CXX compiler identification is GNU 11.4.0
        --- The C compiler identification is GNU 11.4.0
        --- Detecting CXX compiler ABI info
        --- Detecting CXX compiler ABI info - done
        --- Check for working CXX compiler: /usr/bin/c++ - skipped
        --- Detecting CXX compile features
        --- Detecting CXX compile features - done
        --- Detecting C compiler ABI info
        --- Detecting C compiler ABI info - done
        --- Check for working C compiler: /usr/bin/cc - skipped
        --- Detecting C compile features
        --- Detecting C compile features - done
        --- Configuring Target Architecture: avx512f
        --- Configuring Target Triple: x86_64-unknown-linux-gnu
        --- Configuring benchmarks: google
        --- Looking for pthread.h
        --- Looking for pthread.h - found
        --- Performing Test CMAKE_HAVE_LIBC_PTHREAD
        --- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
        --- Found Threads: TRUE  
        --- Performing Test HAVE_SSE
        --- Performing Test HAVE_SSE - Success
        --- 	SSE support - yes
        --- Performing Test HAVE_AVX2
        --- Performing Test HAVE_AVX2 - Success
        --- 	AVX2 support - yes
        --- Performing Test HAVE_AVX512
        --- Performing Test HAVE_AVX512 - Failed
        --- 	AVX512 support - no
        --- Performing Test HAVE_NEON
        --- Performing Test HAVE_NEON - Failed
        --- 	Arm Neon support - no
        --- Configuring done
        --- Generating done
        --- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build
        -[Info] Building vectorization-matrix-benchmark...
        -[1/17] Generating mlir-matmul.o
        -[2/17] Generating mlir-matvec.o
        -[3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a
        -[4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a
        -[5/17] Creating directories for 'project_googlebenchmark'
        -[6/17] Performing download step (git clone) for 'project_googlebenchmark'
        -Cloning into 'project_googlebenchmark'...
        -HEAD is now at f91b6b4 bump version to 1.6 in preparation for release
        -[7/17] No update step for 'project_googlebenchmark'
        -[8/17] No patch step for 'project_googlebenchmark'
        -[9/17] Performing configure step for 'project_googlebenchmark'
        --- The CXX compiler identification is GNU 11.4.0
        --- Detecting CXX compiler ABI info
        --- Detecting CXX compiler ABI info - done
        --- Check for working CXX compiler: /usr/bin/c++ - skipped
        --- Detecting CXX compile features
        --- Detecting CXX compile features - done
        --- Failed to find LLVM FileCheck
        --- Found Git: /usr/bin/git (found version "2.34.1") 
        --- git version: v1.6.0 normalized to 1.6.0
        --- Version: 1.6.0
        --- Performing Test HAVE_CXX_FLAG_STD_CXX11
        --- Performing Test HAVE_CXX_FLAG_STD_CXX11 - Success
        --- Performing Test HAVE_CXX_FLAG_WALL
        --- Performing Test HAVE_CXX_FLAG_WALL - Success
        --- Performing Test HAVE_CXX_FLAG_WEXTRA
        --- Performing Test HAVE_CXX_FLAG_WEXTRA - Success
        --- Performing Test HAVE_CXX_FLAG_WSHADOW
        --- Performing Test HAVE_CXX_FLAG_WSHADOW - Success
        --- Performing Test HAVE_CXX_FLAG_WERROR
        --- Performing Test HAVE_CXX_FLAG_WERROR - Success
        --- Performing Test HAVE_CXX_FLAG_WSUGGEST_OVERRIDE
        --- Performing Test HAVE_CXX_FLAG_WSUGGEST_OVERRIDE - Success
        --- Performing Test HAVE_CXX_FLAG_PEDANTIC
        --- Performing Test HAVE_CXX_FLAG_PEDANTIC - Success
        --- Performing Test HAVE_CXX_FLAG_PEDANTIC_ERRORS
        --- Performing Test HAVE_CXX_FLAG_PEDANTIC_ERRORS - Success
        --- Performing Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32
        --- Performing Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32 - Failed
        --- Performing Test HAVE_CXX_FLAG_FSTRICT_ALIASING
        --- Performing Test HAVE_CXX_FLAG_FSTRICT_ALIASING - Success
        --- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS
        --- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS - Success
        --- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED
        --- Performing Test HAVE_CXX_FLAG_WNO_DEPRECATED - Success
        --- Performing Test HAVE_CXX_FLAG_WSTRICT_ALIASING
        --- Performing Test HAVE_CXX_FLAG_WSTRICT_ALIASING - Success
        --- Performing Test HAVE_CXX_FLAG_WD654
        --- Performing Test HAVE_CXX_FLAG_WD654 - Failed
        --- Performing Test HAVE_CXX_FLAG_WTHREAD_SAFETY
        --- Performing Test HAVE_CXX_FLAG_WTHREAD_SAFETY - Failed
        --- Performing Test HAVE_CXX_FLAG_COVERAGE
        --- Performing Test HAVE_CXX_FLAG_COVERAGE - Success
        --- Performing Test HAVE_STD_REGEX
        -CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message):
        -  If you see build failures due to cross compilation, try setting
        -  HAVE_STD_REGEX to 0
        -Call Stack (most recent call first):
        -  CMakeLists.txt:279 (cxx_feature_check)
        -
        -
        --- Performing Test HAVE_STD_REGEX -- success
        --- Performing Test HAVE_GNU_POSIX_REGEX
        --- Performing Test HAVE_GNU_POSIX_REGEX -- failed to compile
        --- Performing Test HAVE_POSIX_REGEX
        -CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message):
        -  If you see build failures due to cross compilation, try setting
        -  HAVE_POSIX_REGEX to 0
        -Call Stack (most recent call first):
        -  CMakeLists.txt:281 (cxx_feature_check)
        -
        -
        --- Performing Test HAVE_POSIX_REGEX -- success
        --- Performing Test HAVE_STEADY_CLOCK
        -CMake Warning at cmake/CXXFeatureCheck.cmake:43 (message):
        -  If you see build failures due to cross compilation, try setting
        -  HAVE_STEADY_CLOCK to 0
        -Call Stack (most recent call first):
        -  CMakeLists.txt:290 (cxx_feature_check)
        -
        -
        --- Performing Test HAVE_STEADY_CLOCK -- success
        --- Looking for C++ include pthread.h
        --- Looking for C++ include pthread.h - found
        --- Performing Test CMAKE_HAVE_LIBC_PTHREAD
        --- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
        --- Found Threads: TRUE  
        --- Configuring done
        --- Generating done
        --- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build
        -[10/17] Performing build step for 'project_googlebenchmark'
        -[1/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o
        -[2/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o
        -[3/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o
        -[4/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o
        -[5/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o
        -[6/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o
        -[7/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o
        -[8/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o
        -[9/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o
        -[10/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o
        -[11/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o
        -[12/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o
        -[13/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o
        -[14/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o
        -[15/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o
        -[16/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o
        -[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o
        -[18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o
        -[19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o
        -[20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o
        -[21/22] Linking CXX static library src/libbenchmark.a
        -[22/22] Linking CXX static library src/libbenchmark_main.a
        -[11/17] Performing install step for 'project_googlebenchmark'
        -[0/1] Install the project...
        --- Install configuration: "Release"
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/libbenchmark.a
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/libbenchmark_main.a
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/include/benchmark
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/include/benchmark/benchmark.h
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkConfig.cmake
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkConfigVersion.cmake
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/pkgconfig/benchmark.pc
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkTargets.cmake
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/lib/cmake/benchmark/benchmarkTargets-release.cmake
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/AssemblyTests.md
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/_config.yml
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/dependencies.md
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/index.md
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/perf_counters.md
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/platform_specific_build_instructions.md
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/random_interleaving.md
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/releasing.md
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/tools.md
        --- Installing: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/share/doc/benchmark/user_guide.md
        -[12/17] No test step for 'project_googlebenchmark'
        -[13/17] Completed 'project_googlebenchmark'
        -[14/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/Main.cpp.o
        -[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o
        -[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o
        -[17/17] Linking CXX executable bin/vectorization-matrix-benchmark
        -[Info] Running vectorization-matrix-benchmark...
        -2025-05-26T19:43:17+00:00
        -Running ./vectorization-matrix-benchmark
        -Run on (24 X 5100 MHz CPU s)
        -CPU Caches:
        -  L1 Data 48 KiB (x12)
        -  L1 Instruction 32 KiB (x12)
        -  L2 Unified 1280 KiB (x12)
        -  L3 Unified 30720 KiB (x1)
        -Load Average: 23.33, 16.98, 8.31
        -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        ---------------------------------------------------------
        -Benchmark              Time             CPU   Iterations
        ---------------------------------------------------------
        -MLIR_MatMul/1       19.3 ns         19.3 ns     36112317
        -MLIR_MatVec/1       21.0 ns         21.0 ns     33468392
        ---------------------------------------------------------
        -MLIR_MatMul: MLIR MatMul Operation + Nested Loop
        -[ 18 18 18 18 18 18 18 18 18 18 ]
        ---------------------------------------------------------
        -MLIR_MatVec: MLIR MatVec Operation
        -[ 18 18 18 18 18 18 18 18 18 18 ]
        -
        \ No newline at end of file diff --git a/test/test_script_deeplearning.sh b/test/test_script_deeplearning.sh index 5f779962..eef45e08 100755 --- a/test/test_script_deeplearning.sh +++ b/test/test_script_deeplearning.sh @@ -82,6 +82,7 @@ echo "[Info] PYTHONPATH = ${PYTHONPATH}" # 3. Prepare Build Folder and Run CMake ################################################################################ cd "${BUDDY_MLIR_DIR}/../buddy-benchmark" || exit 1 +rm -rf build mkdir -p build cd build || exit 1 @@ -116,7 +117,6 @@ for target in "${BENCHMARK_TARGETS[@]}"; do fi done -################################################################################ ################################################################################ # 5. Run Each Benchmark & Redirect Output (Continue Even If One Fails) ################################################################################ diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log old mode 100755 new mode 100644 diff --git a/test_result/deeplearning/build_results_summary.log b/test_result/deeplearning/build_results_summary.log old mode 100755 new mode 100644 diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.json b/test_result/deeplearning/dl-layer-ffn-benchmark.json new file mode 100644 index 00000000..4c94760e --- /dev/null +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:12:34+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-layer-ffn-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00439,1.19629,1.95947], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_LAYER_FFN/Scalar", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_FFN/Scalar", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10758, + "real_time": 6.5206875738922074e-02, + "cpu_time": 6.5204998977505119e-02, + "time_unit": "ms" + }, + { + "name": "DL_LAYER_FFN/Auto_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_FFN/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 25878, + "real_time": 2.6888433440486709e-02, + "cpu_time": 2.6887743681891955e-02, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log old mode 100755 new mode 100644 index 186cd636..476f00a9 --- a/test_result/deeplearning/dl-layer-ffn-benchmark.log +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:33:30+00:00 +2025-05-26T21:12:34+00:00 Running ./dl-layer-ffn-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.14, 3.58 +Load Average: 1.00, 1.20, 1.96 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------- -DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10714 -DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25753 +DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10758 +DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25878 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json new file mode 100644 index 00000000..ecdebb4c --- /dev/null +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:12:38+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-layer-rmsnorm-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00391,1.19287,1.9541], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_LAYER_RMSNORM/Scalar", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_RMSNORM/Scalar", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 358748, + "real_time": 1.9752554668907084e-03, + "cpu_time": 1.9749622827165587e-03, + "time_unit": "ms" + }, + { + "name": "DL_LAYER_RMSNORM/Auto_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_RMSNORM/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 753724, + "real_time": 9.0683698707158544e-04, + "cpu_time": 9.0681028997351815e-04, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log old mode 100755 new mode 100644 index 351d605f..fae3378b --- a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:33:34+00:00 +2025-05-26T21:12:38+00:00 Running ./dl-layer-rmsnorm-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.13, 3.57 +Load Average: 1.00, 1.19, 1.95 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------ -DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 360260 -DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 748474 +DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 358748 +DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 753724 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.json b/test_result/deeplearning/dl-layer-selfattention-benchmark.json new file mode 100644 index 00000000..1b3b5134 --- /dev/null +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:12:36+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-layer-selfattention-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00391,1.19287,1.9541], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_LAYER_ATTENTION/Scalar", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_ATTENTION/Scalar", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 148, + "real_time": 4.7080184056146726e+00, + "cpu_time": 4.7073387770270267e+00, + "time_unit": "ms" + }, + { + "name": "DL_LAYER_ATTENTION/Auto_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_LAYER_ATTENTION/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 447, + "real_time": 1.5711927248927571e+00, + "cpu_time": 1.5709791208053694e+00, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log old mode 100755 new mode 100644 index 14874ba9..7af6fdca --- a/test_result/deeplearning/dl-layer-selfattention-benchmark.log +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:33:32+00:00 +2025-05-26T21:12:36+00:00 Running ./dl-layer-selfattention-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.14, 3.58 +Load Average: 1.00, 1.19, 1.95 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -DL_LAYER_ATTENTION/Scalar 4.68 ms 4.68 ms 150 -DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 446 +DL_LAYER_ATTENTION/Scalar 4.71 ms 4.71 ms 148 +DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 447 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.json b/test_result/deeplearning/dl-model-lenet-benchmark.json new file mode 100644 index 00000000..3de6403d --- /dev/null +++ b/test_result/deeplearning/dl-model-lenet-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:08:36+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-model-lenet-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.36816,1.44824,2.24854], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_MODEL_LENET/Auto_Vectorization", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_LENET/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4396, + "real_time": 1.5520746374775649e-01, + "cpu_time": 1.5520622338489537e-01, + "time_unit": "ms" + }, + { + "name": "DL_MODEL_LENET/Buddy_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_LENET/Buddy_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5074, + "real_time": 1.3665612941188610e-01, + "cpu_time": 1.3663077552227040e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log old mode 100755 new mode 100644 index f2c5402a..add3c123 --- a/test_result/deeplearning/dl-model-lenet-benchmark.log +++ b/test_result/deeplearning/dl-model-lenet-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:29:36+00:00 +2025-05-26T21:08:36+00:00 Running ./dl-model-lenet-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.32, 4.34 +Load Average: 1.37, 1.45, 2.25 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------- -DL_MODEL_LENET/Auto_Vectorization 0.152 ms 0.152 ms 4530 -DL_MODEL_LENET/Buddy_Vectorization 0.136 ms 0.136 ms 5149 +DL_MODEL_LENET/Auto_Vectorization 0.155 ms 0.155 ms 4396 +DL_MODEL_LENET/Buddy_Vectorization 0.137 ms 0.137 ms 5074 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json new file mode 100644 index 00000000..24f46507 --- /dev/null +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:08:34+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-model-mobilenetv3-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.40039,1.45605,2.25537], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_MobileNet_V3/BM_MobileNet_V3_scalar", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_MobileNet_V3/BM_MobileNet_V3_scalar", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.5308412462472916e+01, + "cpu_time": 3.5303849899999996e+01, + "time_unit": "ms" + }, + { + "name": "BM_MobileNet_V3/BM_MobileNet_V3_conv_opt", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_MobileNet_V3/BM_MobileNet_V3_conv_opt", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 21, + "real_time": 3.2171089379560378e+01, + "cpu_time": 3.2166612142857147e+01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log old mode 100755 new mode 100644 index 075a009a..ef144411 --- a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:29:34+00:00 +2025-05-26T21:08:34+00:00 Running ./dl-model-mobilenetv3-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.32, 4.34 +Load Average: 1.40, 1.46, 2.26 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------- -BM_MobileNet_V3/BM_MobileNet_V3_scalar 35.1 ms 35.1 ms 20 -BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.0 ms 32.0 ms 22 +BM_MobileNet_V3/BM_MobileNet_V3_scalar 35.3 ms 35.3 ms 20 +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.2 ms 32.2 ms 21 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.json b/test_result/deeplearning/dl-model-resnet18-benchmark.json new file mode 100644 index 00000000..2ed2dc2d --- /dev/null +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:12:31+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-model-resnet18-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00439,1.19629,1.95947], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_MODEL_Resnet18/Auto_Vectorization", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_Resnet18/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.2581797651946545e+02, + "cpu_time": 7.1790319600000009e+02, + "time_unit": "ms" + }, + { + "name": "DL_MODEL_Resnet18/Buddy_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_Resnet18/Buddy_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.2334924899041653e+02, + "cpu_time": 7.2326446299999998e+02, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.log b/test_result/deeplearning/dl-model-resnet18-benchmark.log old mode 100755 new mode 100644 index 97b71868..f3779a9b --- a/test_result/deeplearning/dl-model-resnet18-benchmark.log +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:33:27+00:00 +2025-05-26T21:12:31+00:00 Running ./dl-model-resnet18-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.14, 3.59 +Load Average: 1.00, 1.20, 1.96 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -DL_MODEL_Resnet18/Auto_Vectorization 720 ms 720 ms 1 -DL_MODEL_Resnet18/Buddy_Vectorization 719 ms 719 ms 1 +DL_MODEL_Resnet18/Auto_Vectorization 726 ms 718 ms 1 +DL_MODEL_Resnet18/Buddy_Vectorization 723 ms 723 ms 1 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.json b/test_result/deeplearning/dl-model-tinyllama-benchmark.json new file mode 100644 index 00000000..f04efe52 --- /dev/null +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.json @@ -0,0 +1,82 @@ +{ + "context": { + "date": "2025-05-26T21:03:18+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-model-tinyllama-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.60303,2.09766,2.74219], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_MODEL_TINYLLAMA/scalar", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_TINYLLAMA/scalar", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.4531263318471611e+05, + "cpu_time": 1.4530597402600001e+05, + "time_unit": "ms" + }, + { + "name": "DL_MODEL_TINYLLAMA/matmul_opt", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_TINYLLAMA/matmul_opt", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 9.8432251755148172e+03, + "cpu_time": 9.8427707820000032e+03, + "time_unit": "ms" + }, + { + "name": "DL_MODEL_TINYLLAMA/matmul_opt_omp", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_TINYLLAMA/matmul_opt_omp", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.8003611154854298e+03, + "cpu_time": 7.1571572710000164e+03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.log b/test_result/deeplearning/dl-model-tinyllama-benchmark.log old mode 100755 new mode 100644 index 9054af0a..026219f6 --- a/test_result/deeplearning/dl-model-tinyllama-benchmark.log +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:24:01+00:00 +2025-05-26T21:03:18+00:00 Running ./dl-model-tinyllama-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.79, 2.00, 5.81 +Load Average: 1.60, 2.10, 2.74 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ---------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------- -DL_MODEL_TINYLLAMA/scalar 160502 ms 160495 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt 9595 ms 9595 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt_omp 7607 ms 6928 ms 1 +DL_MODEL_TINYLLAMA/scalar 145313 ms 145306 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt 9843 ms 9843 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt_omp 7800 ms 7157 ms 1 ---------- Verification ---------- matmul_opt PASS matmul_opt_omp PASS diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.json b/test_result/deeplearning/dl-model-whisper-benchmark.json new file mode 100644 index 00000000..c1f18f9c --- /dev/null +++ b/test_result/deeplearning/dl-model-whisper-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:08:38+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-model-whisper-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.36816,1.44824,2.24854], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_MODEL_Whisper/Auto_Vectorization", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_Whisper/Auto_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 8.0864415192976594e+04, + "cpu_time": 8.0855295398000002e+04, + "time_unit": "ms" + }, + { + "name": "DL_MODEL_Whisper/Buddy_Vectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_MODEL_Whisper/Buddy_Vectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.5875804258510470e+04, + "cpu_time": 3.5871486203000000e+04, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.log b/test_result/deeplearning/dl-model-whisper-benchmark.log old mode 100755 new mode 100644 index 74e917e7..70acfa9b --- a/test_result/deeplearning/dl-model-whisper-benchmark.log +++ b/test_result/deeplearning/dl-model-whisper-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:29:38+00:00 +2025-05-26T21:08:38+00:00 Running ./dl-model-whisper-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.31, 4.32 +Load Average: 1.37, 1.45, 2.25 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_MODEL_Whisper/Auto_Vectorization 77089 ms 77086 ms 1 -DL_MODEL_Whisper/Buddy_Vectorization 35954 ms 35953 ms 1 +DL_MODEL_Whisper/Auto_Vectorization 80864 ms 80855 ms 1 +DL_MODEL_Whisper/Buddy_Vectorization 35876 ms 35871 ms 1 ----------------------------------------------------------- Correctness Verification for Output1: PASS Correctness Verification for Output2: FAIL diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json new file mode 100644 index 00000000..1fb664ee --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:13:08+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-arithaddf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00098,1.17285,1.92188], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_ADDF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_ADDF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23951, + "real_time": 2.9350469685000002e-02, + "cpu_time": 2.9346575633585240e-02, + "time_unit": "ms" + }, + { + "name": "BM_ADDF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_ADDF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 174606, + "real_time": 4.2519020574317591e-03, + "cpu_time": 4.2513099836202651e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log old mode 100755 new mode 100644 index 3561bcf9..01107b96 --- a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:03+00:00 +2025-05-26T21:13:08+00:00 Running ./dl-op-linalg-arithaddf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.12, 3.48 +Load Average: 1.00, 1.17, 1.92 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_ADDF_SCALAR 0.030 ms 0.030 ms 23576 -BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 174965 +BM_ADDF_SCALAR 0.029 ms 0.029 ms 23951 +BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 174606 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json new file mode 100644 index 00000000..3d9d66cb --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:13:11+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-arithdivf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00049,1.16992,1.9165], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_DIVF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_DIVF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22508, + "real_time": 2.9681050269122190e-02, + "cpu_time": 2.9680807579527284e-02, + "time_unit": "ms" + }, + { + "name": "BM_DIVF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_DIVF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 73818, + "real_time": 9.6117635958387036e-03, + "cpu_time": 9.6116319325909687e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log old mode 100755 new mode 100644 index 94f4277b..06005493 --- a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:05+00:00 +2025-05-26T21:13:11+00:00 Running ./dl-op-linalg-arithdivf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.12, 3.48 +Load Average: 1.00, 1.17, 1.92 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_DIVF_SCALAR 0.030 ms 0.030 ms 23149 -BM_DIVF_AutoVectorization 0.009 ms 0.009 ms 73790 +BM_DIVF_SCALAR 0.030 ms 0.030 ms 22508 +BM_DIVF_AutoVectorization 0.010 ms 0.010 ms 73818 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json new file mode 100644 index 00000000..922f1953 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:13:12+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-arithmulf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00049,1.16992,1.9165], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_MULF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_MULF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23392, + "real_time": 2.9420993729111028e-02, + "cpu_time": 2.9417276718536250e-02, + "time_unit": "ms" + }, + { + "name": "BM_MULF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_MULF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 175155, + "real_time": 3.9999952532304437e-03, + "cpu_time": 3.9999198538437381e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log old mode 100755 new mode 100644 index 7eb04e7f..aa469118 --- a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:07+00:00 +2025-05-26T21:13:12+00:00 Running ./dl-op-linalg-arithmulf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.12, 3.48 +Load Average: 1.00, 1.17, 1.92 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_MULF_SCALAR 0.030 ms 0.030 ms 23959 -BM_MULF_AutoVectorization 0.004 ms 0.004 ms 175122 +BM_MULF_SCALAR 0.029 ms 0.029 ms 23392 +BM_MULF_AutoVectorization 0.004 ms 0.004 ms 175155 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json new file mode 100644 index 00000000..6ad1caf3 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:13:15+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-arithnegf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00049,1.16992,1.9165], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_NEGF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_NEGF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 30765, + "real_time": 2.2786648212342501e-02, + "cpu_time": 2.2786353388590929e-02, + "time_unit": "ms" + }, + { + "name": "BM_NEGF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_NEGF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 290149, + "real_time": 2.4227396805301059e-03, + "cpu_time": 2.4227158287638420e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log old mode 100755 new mode 100644 index fc8e6962..532f64c3 --- a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:09+00:00 +2025-05-26T21:13:15+00:00 Running ./dl-op-linalg-arithnegf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.12, 3.47 +Load Average: 1.00, 1.17, 1.92 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_NEGF_SCALAR 0.023 ms 0.023 ms 30704 -BM_NEGF_AutoVectorization 0.003 ms 0.003 ms 212512 +BM_NEGF_SCALAR 0.023 ms 0.023 ms 30765 +BM_NEGF_AutoVectorization 0.002 ms 0.002 ms 290149 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json new file mode 100644 index 00000000..4be3ebde --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:13:17+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-arithsubf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1,1.16699,1.91113], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_SUBF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_SUBF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23979, + "real_time": 2.9514157302017990e-02, + "cpu_time": 2.9510655823845863e-02, + "time_unit": "ms" + }, + { + "name": "BM_SUBF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_SUBF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 175235, + "real_time": 4.0285944484571151e-03, + "cpu_time": 4.0285024795274904e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log old mode 100755 new mode 100644 index 6d9c797c..27264f4f --- a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:11+00:00 +2025-05-26T21:13:17+00:00 Running ./dl-op-linalg-arithsubf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.12, 3.47 +Load Average: 1.00, 1.17, 1.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_SUBF_SCALAR 0.030 ms 0.030 ms 23752 -BM_SUBF_AutoVectorization 0.005 ms 0.005 ms 174941 +BM_SUBF_SCALAR 0.030 ms 0.030 ms 23979 +BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 175235 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json new file mode 100644 index 00000000..6fb82eda --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json @@ -0,0 +1,138 @@ +{ + "context": { + "date": "2025-05-26T21:12:58+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-batch-matmul-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00195,1.1792,1.93262], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_BATCH_MATMUL/Scalar/iterations:1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/Scalar/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.5513852797448635e+03, + "cpu_time": 3.5510254610000002e+03, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.0021720249205828e+03, + "cpu_time": 1.0020681910000002e+03, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/Vectorization/iterations:1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/Vectorization/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.9173206947743893e+02, + "cpu_time": 1.9172282700000042e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/Tile/iterations:1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/Tile/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.0962291248142719e+02, + "cpu_time": 1.0962218999999962e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/SCF/iterations:1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/SCF/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.1728629097342491e+02, + "cpu_time": 1.1727965299999977e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.5626054368913174e+02, + "cpu_time": 3.5615294499999982e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.9095944762229919e+01, + "cpu_time": 3.1461689000000348e+01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log old mode 100755 new mode 100644 index 2d4aa1cb..7dc25b0e --- a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:33:53+00:00 +2025-05-26T21:12:58+00:00 Running ./dl-op-linalg-batch-matmul-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,18 +6,18 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.12, 3.51 +Load Average: 1.00, 1.18, 1.93 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3525 ms 3525 ms 1 -DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 974 ms 974 ms 1 -DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 190 ms 190 ms 1 -DL_OPS_BATCH_MATMUL/Tile/iterations:1 109 ms 109 ms 1 +DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3551 ms 3551 ms 1 +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 1002 ms 1002 ms 1 +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 192 ms 192 ms 1 +DL_OPS_BATCH_MATMUL/Tile/iterations:1 110 ms 110 ms 1 DL_OPS_BATCH_MATMUL/SCF/iterations:1 117 ms 117 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 352 ms 352 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 80.7 ms 53.0 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 356 ms 356 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 79.1 ms 31.5 ms 1 ---------- Verification ---------- Tile PASS SCF PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json new file mode 100644 index 00000000..6af577a1 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:12:52+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-conv2d-nchw-fchw-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00244,1.18262,1.93799], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_Conv2DNchwFchw_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_Conv2DNchwFchw_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2, + "real_time": 2.8201012406498194e+02, + "cpu_time": 2.8196062250000000e+02, + "time_unit": "ms" + }, + { + "name": "BM_Conv2DNchwFchw_Im2col", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_Conv2DNchwFchw_Im2col", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 62, + "real_time": 1.1751845959694155e+01, + "cpu_time": 1.1751463612903226e+01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log old mode 100755 new mode 100644 index d41a78ce..8fb49caf --- a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:33:46+00:00 +2025-05-26T21:12:52+00:00 Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.13, 3.54 +Load Average: 1.00, 1.18, 1.94 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- BM_Conv2DNchwFchw_SCALAR 282 ms 282 ms 2 -BM_Conv2DNchwFchw_Im2col 8.35 ms 8.35 ms 86 +BM_Conv2DNchwFchw_Im2col 11.8 ms 11.8 ms 62 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json new file mode 100644 index 00000000..3d653d03 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json @@ -0,0 +1,96 @@ +{ + "context": { + "date": "2025-05-26T21:12:56+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-conv2d-nhwc-fhwc-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00195,1.1792,1.93262], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 7.3650452867150307e+01, + "cpu_time": 7.3648638199999994e+01, + "time_unit": "ms" + }, + { + "name": "DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 9.3432668596506119e+00, + "cpu_time": 9.3431779999999964e+00, + "time_unit": "ms" + }, + { + "name": "DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.7432916909456253e+00, + "cpu_time": 1.7432738000000003e+00, + "time_unit": "ms" + }, + { + "name": "DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.7292745411396027e+00, + "cpu_time": 1.7292597999999937e+00, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log old mode 100755 new mode 100644 index 9f8ee937..a539bbd2 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:33:50+00:00 +2025-05-26T21:12:56+00:00 Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,12 +6,12 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.13, 3.52 +Load Average: 1.00, 1.18, 1.93 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------------- -DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 72.3 ms 72.3 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 73.7 ms 73.6 ms 5 DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.34 ms 9.34 ms 5 DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.74 ms 1.74 ms 5 DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.73 ms 1.73 ms 5 diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json new file mode 100644 index 00000000..fdaf6bed --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:12:54+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-conv2d-nhwc-hwcf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00244,1.18262,1.93799], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_CONV_2D_NHWC_HWCF_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_CONV_2D_NHWC_HWCF_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22, + "real_time": 3.2554571686143227e+01, + "cpu_time": 3.2554057500000006e+01, + "time_unit": "ms" + }, + { + "name": "BM_CONV_2D_NHWC_HWCF_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_CONV_2D_NHWC_HWCF_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 115, + "real_time": 6.0713487960722139e+00, + "cpu_time": 6.0712789217391308e+00, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log old mode 100755 new mode 100644 index 34a043ac..d9fe9718 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:33:48+00:00 +2025-05-26T21:12:54+00:00 Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.13, 3.52 +Load Average: 1.00, 1.18, 1.94 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------- -BM_CONV_2D_NHWC_HWCF_SCALAR 32.3 ms 32.3 ms 22 -BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.14 ms 6.14 ms 114 +BM_CONV_2D_NHWC_HWCF_SCALAR 32.6 ms 32.6 ms 22 +BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.07 ms 6.07 ms 115 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json new file mode 100644 index 00000000..16a28530 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json @@ -0,0 +1,82 @@ +{ + "context": { + "date": "2025-05-26T21:12:56+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00195,1.1792,1.93262], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 4.8418600112199783e+00, + "cpu_time": 4.8414392000000008e+00, + "time_unit": "ms" + }, + { + "name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.6818478703498840e+00, + "cpu_time": 1.6818200000000005e+00, + "time_unit": "ms" + }, + { + "name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.2000650167465210e-01, + "cpu_time": 1.2000399999999996e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log old mode 100755 new mode 100644 index 3573e854..517bdee8 --- a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:33:50+00:00 +2025-05-26T21:12:56+00:00 Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.13, 3.52 +Load Average: 1.00, 1.18, 1.93 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------ -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 6.55 ms 6.54 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 4.84 ms 4.84 ms 5 DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.68 ms 1.68 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.124 ms 0.124 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.120 ms 0.120 ms 5 ---------- Verification ---------- auto_vectorization PASS vectorization PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json new file mode 100644 index 00000000..acb58c6e --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:13:22+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-mathexp-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.08008,1.18115,1.91162], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_EXP_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_EXP_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 15289, + "real_time": 4.5686233631569172e-02, + "cpu_time": 4.5684854862973377e-02, + "time_unit": "ms" + }, + { + "name": "BM_EXP_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_EXP_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22195, + "real_time": 3.1881006680031194e-02, + "cpu_time": 3.1880129173237209e-02, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log old mode 100755 new mode 100644 index ed7837dc..67453461 --- a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:17+00:00 +2025-05-26T21:13:22+00:00 Running ./dl-op-linalg-mathexp-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.11, 3.46 +Load Average: 1.08, 1.18, 1.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- -BM_EXP_SCALAR 0.046 ms 0.046 ms 15245 -BM_EXP_AutoVectorization 0.031 ms 0.031 ms 22544 +BM_EXP_SCALAR 0.046 ms 0.046 ms 15289 +BM_EXP_AutoVectorization 0.032 ms 0.032 ms 22195 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json new file mode 100644 index 00000000..c843a4ca --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:13:19+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-mathfpow-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1,1.16699,1.91113], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_FPOW_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_FPOW_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 8096, + "real_time": 8.3391365306194232e-02, + "cpu_time": 8.3389921689723326e-02, + "time_unit": "ms" + }, + { + "name": "BM_FPOW_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_FPOW_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 12303, + "real_time": 5.7476819394846557e-02, + "cpu_time": 5.7475786474843533e-02, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log old mode 100755 new mode 100644 index 58cbd7ee..ca5c13f0 --- a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:13+00:00 +2025-05-26T21:13:19+00:00 Running ./dl-op-linalg-mathfpow-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.11, 3.46 +Load Average: 1.00, 1.17, 1.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_FPOW_SCALAR 0.084 ms 0.084 ms 8153 -BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12317 +BM_FPOW_SCALAR 0.083 ms 0.083 ms 8096 +BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12303 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json new file mode 100644 index 00000000..a1577c5f --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:13:20+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-mathrsqrt-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.08008,1.18115,1.91162], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_RSQRT_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_RSQRT_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 9407, + "real_time": 7.2711996517919431e-02, + "cpu_time": 7.2711034548740297e-02, + "time_unit": "ms" + }, + { + "name": "BM_RSQRT_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_RSQRT_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 161010, + "real_time": 4.3512647488525760e-03, + "cpu_time": 4.3511522389913680e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log old mode 100755 new mode 100644 index 0e28e595..c1254311 --- a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:15+00:00 +2025-05-26T21:13:20+00:00 Running ./dl-op-linalg-mathrsqrt-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.11, 3.46 +Load Average: 1.08, 1.18, 1.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9557 -BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 161107 +BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9407 +BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 161010 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json new file mode 100644 index 00000000..c7d1939e --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json @@ -0,0 +1,110 @@ +{ + "context": { + "date": "2025-05-26T21:12:40+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-matmul-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00342,1.18945,1.94873], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_MATMUL/scalar_O0/iterations:1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL/scalar_O0/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.2198417354375124e+03, + "cpu_time": 4.2197150860000011e+03, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL/scalar_O3/iterations:1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL/scalar_O3/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.3937856554985046e+03, + "cpu_time": 3.3936371459999996e+03, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL/tile/iterations:1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL/tile/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.1714999191462994e+02, + "cpu_time": 1.1714971299999988e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL/vec/iterations:1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL/vec/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.9898965060710907e+01, + "cpu_time": 5.9899150999999762e+01, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL/vec_omp/iterations:1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL/vec_omp/iterations:1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.0501415953040123e+01, + "cpu_time": 9.7919029999999907e+00, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log old mode 100755 new mode 100644 index d020eeff..9d8f9029 --- a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:33:36+00:00 +2025-05-26T21:12:40+00:00 Running ./dl-op-linalg-matmul-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,16 +6,16 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.13, 3.57 +Load Average: 1.00, 1.19, 1.95 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_OPS_MATMUL/scalar_O0/iterations:1 3394 ms 3394 ms 1 -DL_OPS_MATMUL/scalar_O3/iterations:1 2944 ms 2944 ms 1 -DL_OPS_MATMUL/tile/iterations:1 120 ms 120 ms 1 -DL_OPS_MATMUL/vec/iterations:1 139 ms 139 ms 1 -DL_OPS_MATMUL/vec_omp/iterations:1 67.8 ms 17.8 ms 1 +DL_OPS_MATMUL/scalar_O0/iterations:1 4220 ms 4220 ms 1 +DL_OPS_MATMUL/scalar_O3/iterations:1 3394 ms 3394 ms 1 +DL_OPS_MATMUL/tile/iterations:1 117 ms 117 ms 1 +DL_OPS_MATMUL/vec/iterations:1 59.9 ms 59.9 ms 1 +DL_OPS_MATMUL/vec_omp/iterations:1 30.5 ms 9.79 ms 1 ---------- Verification ---------- tile PASS vec PASS diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json new file mode 100644 index 00000000..b4b37a1e --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:12:56+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-pooling-nhwc-sum-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00195,1.1792,1.93262], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_POOLING_NHWC_SUM_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_POOLING_NHWC_SUM_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2972, + "real_time": 2.3404261144943544e-01, + "cpu_time": 2.3401008546433380e-01, + "time_unit": "ms" + }, + { + "name": "BM_POOLING_NHWC_SUM_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_POOLING_NHWC_SUM_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 16865, + "real_time": 4.1462073481970008e-02, + "cpu_time": 4.1460686095463981e-02, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log old mode 100755 new mode 100644 index c13c5712..fe01dc2e --- a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:33:51+00:00 +2025-05-26T21:12:56+00:00 Running ./dl-op-linalg-pooling-nhwc-sum-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.13, 3.52 +Load Average: 1.00, 1.18, 1.93 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 2997 -BM_POOLING_NHWC_SUM_AutoVectorization 0.042 ms 0.042 ms 16895 +BM_POOLING_NHWC_SUM_SCALAR 0.234 ms 0.234 ms 2972 +BM_POOLING_NHWC_SUM_AutoVectorization 0.041 ms 0.041 ms 16865 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json new file mode 100644 index 00000000..b9374b08 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json @@ -0,0 +1,38 @@ +{ + "context": { + "date": "2025-05-26T21:13:25+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-reduceaddf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.08008,1.18115,1.91162], + "library_build_type": "release" + }, + "benchmarks": [ diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log old mode 100755 new mode 100644 index 5e44eb1f..f25afa63 --- a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:19+00:00 +2025-05-26T21:13:25+00:00 Running ./dl-op-linalg-reduceaddf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,5 +6,5 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.11, 3.44 +Load Average: 1.08, 1.18, 1.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json new file mode 100644 index 00000000..1df2bbe8 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json @@ -0,0 +1,38 @@ +{ + "context": { + "date": "2025-05-26T21:13:25+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-reducemaxf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.08008,1.18115,1.91162], + "library_build_type": "release" + }, + "benchmarks": [ diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log old mode 100755 new mode 100644 index e96bd0ad..42781e26 --- a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:19+00:00 +2025-05-26T21:13:25+00:00 Running ./dl-op-linalg-reducemaxf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,5 +6,5 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.11, 3.44 +Load Average: 1.08, 1.18, 1.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json new file mode 100644 index 00000000..a05381b8 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:13:25+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-softmax-exp-sum-div-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.08008,1.18115,1.91162], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_SOFTMAXEXPSUMDIV_SCALAR", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_SOFTMAXEXPSUMDIV_SCALAR", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 125501, + "real_time": 5.5569801497239611e-03, + "cpu_time": 5.5562642688106074e-03, + "time_unit": "ms" + }, + { + "name": "BM_SOFTMAXEXPSUMDIV_AutoVectorization", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_SOFTMAXEXPSUMDIV_AutoVectorization", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 181845, + "real_time": 3.8459822164716873e-03, + "cpu_time": 3.8455111880997556e-03, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log old mode 100755 new mode 100644 index c503949b..672556ed --- a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:19+00:00 +2025-05-26T21:13:25+00:00 Running ./dl-op-linalg-softmax-exp-sum-div-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.11, 3.44 +Load Average: 1.08, 1.18, 1.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 123343 -BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 181973 +BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 125501 +BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 181845 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json new file mode 100644 index 00000000..79beaecd --- /dev/null +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json @@ -0,0 +1,96 @@ +{ + "context": { + "date": "2025-05-26T21:13:28+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-matmul-transpose-b-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.07324,1.17773,1.90625], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.2652646832168102e+03, + "cpu_time": 1.2636451276000000e+03, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 3.2678271271288395e+02, + "cpu_time": 3.2677019700000011e+02, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 3.0260816961526871e+01, + "cpu_time": 1.8804882999999961e+01, + "time_unit": "ms" + }, + { + "name": "DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 8.3582005649805069e+01, + "cpu_time": 8.3579965600000250e+01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log old mode 100755 new mode 100644 index 05074bd2..0b18c23a --- a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:22+00:00 +2025-05-26T21:13:28+00:00 Running ./dl-op-matmul-transpose-b-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,15 +6,15 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.11, 3.44 +Load Average: 1.07, 1.18, 1.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------------------- -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1262 ms 1262 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 311 ms 311 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 33.9 ms 22.0 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 85.3 ms 85.3 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1265 ms 1264 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 327 ms 327 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 30.3 ms 18.8 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 83.6 ms 83.6 ms 5 ---------- Verification ---------- scalar_O3 PASS scalar_O3_omp PASS diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json new file mode 100644 index 00000000..529b1a14 --- /dev/null +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json @@ -0,0 +1,68 @@ +{ + "context": { + "date": "2025-05-26T21:13:27+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-tosa-transpose-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.07324,1.17773,1.90625], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 2.9716657474637032e+01, + "cpu_time": 2.0871131400000003e+01, + "time_unit": "ms" + }, + { + "name": "DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 2.1914588660001755e+01, + "cpu_time": 2.0427011800000006e+01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log old mode 100755 new mode 100644 index 12c7ba28..78e840aa --- a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log @@ -1,4 +1,4 @@ -2025-05-25T16:34:22+00:00 +2025-05-26T21:13:27+00:00 Running ./dl-op-tosa-transpose-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,12 +6,12 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.11, 3.44 +Load Average: 1.07, 1.18, 1.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------- -DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 25.6 ms 19.9 ms 5 -DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 19.1 ms 16.2 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 29.7 ms 20.9 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 21.9 ms 20.4 ms 5 ---------- Verification ---------- scalar_O3 PASS diff --git a/test_result/deeplearning/run_results_summary.log b/test_result/deeplearning/run_results_summary.log old mode 100755 new mode 100644 index ce1a088d..47cf8e48 --- a/test_result/deeplearning/run_results_summary.log +++ b/test_result/deeplearning/run_results_summary.log @@ -1,29 +1,83 @@ [Success] Run of 'dl-model-tinyllama-benchmark' + ↳ stdout/stderr → dl-model-tinyllama-benchmark.log + ↳ gbench JSON → dl-model-tinyllama-benchmark.json [Success] Run of 'dl-model-mobilenetv3-benchmark' + ↳ stdout/stderr → dl-model-mobilenetv3-benchmark.log + ↳ gbench JSON → dl-model-mobilenetv3-benchmark.json [Success] Run of 'dl-model-lenet-benchmark' + ↳ stdout/stderr → dl-model-lenet-benchmark.log + ↳ gbench JSON → dl-model-lenet-benchmark.json [Missing] Executable not found for 'dl-model-bert-benchmark' [Success] Run of 'dl-model-whisper-benchmark' + ↳ stdout/stderr → dl-model-whisper-benchmark.log + ↳ gbench JSON → dl-model-whisper-benchmark.json [Success] Run of 'dl-model-resnet18-benchmark' + ↳ stdout/stderr → dl-model-resnet18-benchmark.log + ↳ gbench JSON → dl-model-resnet18-benchmark.json [Success] Run of 'dl-layer-ffn-benchmark' + ↳ stdout/stderr → dl-layer-ffn-benchmark.log + ↳ gbench JSON → dl-layer-ffn-benchmark.json [Success] Run of 'dl-layer-selfattention-benchmark' + ↳ stdout/stderr → dl-layer-selfattention-benchmark.log + ↳ gbench JSON → dl-layer-selfattention-benchmark.json [Success] Run of 'dl-layer-rmsnorm-benchmark' + ↳ stdout/stderr → dl-layer-rmsnorm-benchmark.log + ↳ gbench JSON → dl-layer-rmsnorm-benchmark.json [Success] Run of 'dl-op-linalg-matmul-benchmark' + ↳ stdout/stderr → dl-op-linalg-matmul-benchmark.log + ↳ gbench JSON → dl-op-linalg-matmul-benchmark.json [Success] Run of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' + ↳ stdout/stderr → dl-op-linalg-conv2d-nchw-fchw-benchmark.log + ↳ gbench JSON → dl-op-linalg-conv2d-nchw-fchw-benchmark.json [Success] Run of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' + ↳ stdout/stderr → dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log + ↳ gbench JSON → dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json [Success] Run of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' + ↳ stdout/stderr → dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log + ↳ gbench JSON → dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json [Success] Run of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' + ↳ stdout/stderr → dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log + ↳ gbench JSON → dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json [Success] Run of 'dl-op-linalg-pooling-nhwc-sum-benchmark' + ↳ stdout/stderr → dl-op-linalg-pooling-nhwc-sum-benchmark.log + ↳ gbench JSON → dl-op-linalg-pooling-nhwc-sum-benchmark.json [Success] Run of 'dl-op-linalg-batch-matmul-benchmark' + ↳ stdout/stderr → dl-op-linalg-batch-matmul-benchmark.log + ↳ gbench JSON → dl-op-linalg-batch-matmul-benchmark.json [Success] Run of 'dl-op-linalg-arithaddf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithaddf-benchmark.log + ↳ gbench JSON → dl-op-linalg-arithaddf-benchmark.json [Success] Run of 'dl-op-linalg-arithdivf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithdivf-benchmark.log + ↳ gbench JSON → dl-op-linalg-arithdivf-benchmark.json [Success] Run of 'dl-op-linalg-arithmulf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithmulf-benchmark.log + ↳ gbench JSON → dl-op-linalg-arithmulf-benchmark.json [Success] Run of 'dl-op-linalg-arithnegf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithnegf-benchmark.log + ↳ gbench JSON → dl-op-linalg-arithnegf-benchmark.json [Success] Run of 'dl-op-linalg-arithsubf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithsubf-benchmark.log + ↳ gbench JSON → dl-op-linalg-arithsubf-benchmark.json [Success] Run of 'dl-op-linalg-mathfpow-benchmark' + ↳ stdout/stderr → dl-op-linalg-mathfpow-benchmark.log + ↳ gbench JSON → dl-op-linalg-mathfpow-benchmark.json [Success] Run of 'dl-op-linalg-mathrsqrt-benchmark' + ↳ stdout/stderr → dl-op-linalg-mathrsqrt-benchmark.log + ↳ gbench JSON → dl-op-linalg-mathrsqrt-benchmark.json [Success] Run of 'dl-op-linalg-mathexp-benchmark' + ↳ stdout/stderr → dl-op-linalg-mathexp-benchmark.log + ↳ gbench JSON → dl-op-linalg-mathexp-benchmark.json [Failed] Run of 'dl-op-linalg-reduceaddf-benchmark' + ↳ stdout/stderr → dl-op-linalg-reduceaddf-benchmark.log (may contain errors) [Failed] Run of 'dl-op-linalg-reducemaxf-benchmark' + ↳ stdout/stderr → dl-op-linalg-reducemaxf-benchmark.log (may contain errors) [Success] Run of 'dl-op-linalg-softmax-exp-sum-div-benchmark' + ↳ stdout/stderr → dl-op-linalg-softmax-exp-sum-div-benchmark.log + ↳ gbench JSON → dl-op-linalg-softmax-exp-sum-div-benchmark.json [Success] Run of 'dl-op-tosa-transpose-benchmark' + ↳ stdout/stderr → dl-op-tosa-transpose-benchmark.log + ↳ gbench JSON → dl-op-tosa-transpose-benchmark.json [Success] Run of 'dl-op-matmul-transpose-b-benchmark' + ↳ stdout/stderr → dl-op-matmul-transpose-b-benchmark.log + ↳ gbench JSON → dl-op-matmul-transpose-b-benchmark.json diff --git a/test_result/geminiprocessing/build.log b/test_result/geminiprocessing/build.log deleted file mode 100755 index 8473f261..00000000 --- a/test_result/geminiprocessing/build.log +++ /dev/null @@ -1,655 +0,0 @@ -[1/21] Creating directories for 'project_googlebenchmark' -[2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -riscv64-unknown-linux-gnu-gcc -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4': -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given - 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); - | ^ -In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23: -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:251: note: macro "gemmini_extended_config_ex" defined here - 251 | #define gemmini_extended_config_ex(dataflow, sys_act, sys_shift, relu6_shift, A_stride, A_transpose, B_transpose) \ - | -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: error: 'gemmini_extended_config_ex' undeclared (first use in this function) - 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: note: each undeclared identifier is reported only once for each function it appears in -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:35:18: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] - 35 | int32_t *res = (int32_t*) ((uint32_t)gemm_acc_malloc (16 * 16 * 4 * 4 * sizeof(int32_t))); - | ^ -In file included from /home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:20, - from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23: -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload' - 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:119: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload' - 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload' - 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:125: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload' - 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload' - 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload' - 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload' - 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload' - 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload' - 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:126: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload' - 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload' - 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:132: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload' - 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload' - 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload' - 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload' - 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload' - 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload' - 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload' - 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload' - 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload' - 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload' - 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload' - 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload' - 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload' - 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload' - 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload' - 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload' - 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload' - 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload' - 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload' - 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload' - 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload' - 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' - 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:89: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) ); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:9: note: in expansion of macro 'gemmini_extended_mvout' - 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) ); - | ^~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) ); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:9: note: in expansion of macro 'gemmini_extended_mvout' - 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) ); - | ^~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) ); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:9: note: in expansion of macro 'gemmini_extended_mvout' - 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) ); - | ^~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) ); - | ^ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' - 152 | : "r"(rs1), "r"(rs2)); \ - | ^~~ -/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' - 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) - | ^~~~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:9: note: in expansion of macro 'gemmini_extended_mvout' - 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) ); - | ^~~~~~~~~~~~~~~~~~~~~~ -/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:105:17: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] - 105 | gemm_acc_free((uint32_t)(res)); - | ^ -[3/21] Generating buddy_matmul.o -[4/21] Building CXX object benchmarks/Gemmini/ResNet-101/CMakeFiles/CRunnerUtils.dir/CRunnerUtils.cpp.o -[5/21] Performing download step (git clone) for 'project_googlebenchmark' -Cloning into 'project_googlebenchmark'... -HEAD is now at f91b6b4 bump version to 1.6 in preparation for release -[6/21] Generating resnet-101.o -ninja: build stopped: subcommand failed. diff --git a/test_result/geminiprocessing/cmake_configure.log b/test_result/geminiprocessing/cmake_configure.log deleted file mode 100755 index a3a42f37..00000000 --- a/test_result/geminiprocessing/cmake_configure.log +++ /dev/null @@ -1,37 +0,0 @@ --- The CXX compiler identification is GNU 9.2.0 --- The C compiler identification is GNU 9.2.0 --- Detecting CXX compiler ABI info --- Detecting CXX compiler ABI info - done --- Check for working CXX compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-g++ - skipped --- Detecting CXX compile features --- Detecting CXX compile features - done --- Detecting C compiler ABI info --- Detecting C compiler ABI info - done --- Check for working C compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-gcc - skipped --- Detecting C compile features --- Detecting C compile features - done --- Configuring Target Architecture: avx512f --- Configuring Target Triple: x86_64-unknown-linux-gnu --- Configuring benchmarks: google --- Performing Test CMAKE_HAVE_LIBC_PTHREAD --- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed --- Looking for pthread_create in pthreads --- Looking for pthread_create in pthreads - not found --- Looking for pthread_create in pthread --- Looking for pthread_create in pthread - found --- Found Threads: TRUE --- Performing Test HAVE_SSE --- Performing Test HAVE_SSE - Failed --- SSE support - no --- Performing Test HAVE_AVX2 --- Performing Test HAVE_AVX2 - Failed --- AVX2 support - no --- Performing Test HAVE_AVX512 --- Performing Test HAVE_AVX512 - Failed --- AVX512 support - no --- Performing Test HAVE_NEON --- Performing Test HAVE_NEON - Failed --- Arm Neon support - no --- Configuring done --- Generating done --- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log new file mode 100644 index 00000000..0ee3637b --- /dev/null +++ b/test_result/imageprocessing/image-processing-result.log @@ -0,0 +1,179 @@ +Benchmark results - Mon May 26 20:28:28 UTC 2025 +Testing SSE support +SSE is supported. +Running image-processing-benchmark for SSE +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Testing AVX2 support +AVX2 is supported. +Running image-processing-benchmark for AVX2 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +terminate called after throwing an instance of 'std::invalid_argument' + what(): Wrong format of command line arguments. +Correct format is ./image-processing-benchmark + where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. + +Testing AVX512 support +CPU does not support AVX512. +Testing NEON support +CPU does not support NEON. diff --git a/test_result/vectorization/vectorization_matrix.json b/test_result/vectorization/vectorization_matrix.json index 0c3e59ca..a8af91a1 100644 --- a/test_result/vectorization/vectorization_matrix.json +++ b/test_result/vectorization/vectorization_matrix.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T20:00:05+00:00", + "date": "2025-05-26T20:28:53+00:00", "host_name": "4ed4bacfe45d", "executable": "./vectorization-matrix-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [26.4492,18.2622,10.6772], + "load_avg": [2.49609,3.41797,6.28662], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 36916854, - "real_time": 1.9063199384214371e+01, - "cpu_time": 1.9060853641537282e+01, + "iterations": 36693381, + "real_time": 1.9134156139629500e+01, + "cpu_time": 1.9131720268568333e+01, "time_unit": "ns" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 33686586, - "real_time": 2.0769743381568315e+01, - "cpu_time": 2.0767210010536537e+01, + "iterations": 33941913, + "real_time": 2.0856437909828337e+01, + "cpu_time": 2.0853729811870071e+01, "time_unit": "ns" } ] diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log index 8a4c65bb..f59d0d3e 100755 --- a/test_result/vectorization/vectorization_result.log +++ b/test_result/vectorization/vectorization_result.log @@ -1,4 +1,4 @@ -Vectorization Benchmark - Mon May 26 19:59:59 UTC 2025 +Vectorization Benchmark - Mon May 26 20:28:47 UTC 2025 [Info] Starting vectorization-matrix-benchmark build... [Info] Running CMake configuration... -- The CXX compiler identification is GNU 11.4.0 @@ -128,23 +128,23 @@ Call Stack (most recent call first): -- Generating done -- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build [10/17] Performing build step for 'project_googlebenchmark' -[1/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o -[2/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o -[3/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[1/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o +[2/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[3/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o [4/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o -[5/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o -[6/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o -[7/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o -[8/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o +[7/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o [9/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o -[10/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o -[11/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o +[10/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o +[11/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o [12/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o [13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o -[14/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o -[15/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o -[16/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o -[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o +[15/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[17/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o [18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o [19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o [20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o @@ -180,7 +180,7 @@ Call Stack (most recent call first): [16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o [17/17] Linking CXX executable bin/vectorization-matrix-benchmark [Info] Running vectorization-matrix-benchmark... -2025-05-26T20:00:05+00:00 +2025-05-26T20:28:53+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -188,13 +188,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 26.45, 18.26, 10.68 +Load Average: 2.50, 3.42, 6.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 19.1 ns 19.1 ns 36916854 -MLIR_MatVec/1 20.8 ns 20.8 ns 33686586 +MLIR_MatMul/1 19.1 ns 19.1 ns 36693381 +MLIR_MatVec/1 20.9 ns 20.9 ns 33941913 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] From c170ccf30df770ed2f621105b098a0fab79e0b86 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Mon, 26 May 2025 23:38:08 +0200 Subject: [PATCH 18/52] update web --- scripts/logs2html.py | 1 + site/deeplearning/dl-layer-ffn-benchmark.html | 36 ++++++++++++++ .../dl-layer-rmsnorm-benchmark.html | 36 ++++++++++++++ .../dl-layer-selfattention-benchmark.html | 36 ++++++++++++++ .../dl-model-lenet-benchmark.html | 37 ++++++++++++++ .../dl-model-mobilenetv3-benchmark.html | 37 ++++++++++++++ .../dl-model-resnet18-benchmark.html | 36 ++++++++++++++ .../dl-model-tinyllama-benchmark.html | 38 +++++++++++++++ .../dl-model-whisper-benchmark.html | 37 ++++++++++++++ .../dl-op-linalg-arithaddf-benchmark.html | 37 ++++++++++++++ .../dl-op-linalg-arithdivf-benchmark.html | 37 ++++++++++++++ .../dl-op-linalg-arithmulf-benchmark.html | 37 ++++++++++++++ .../dl-op-linalg-arithnegf-benchmark.html | 37 ++++++++++++++ .../dl-op-linalg-arithsubf-benchmark.html | 37 ++++++++++++++ .../dl-op-linalg-batch-matmul-benchmark.html | 48 +++++++++++++++++++ ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 37 ++++++++++++++ ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 41 ++++++++++++++++ ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 37 ++++++++++++++ ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 38 +++++++++++++++ .../dl-op-linalg-mathexp-benchmark.html | 37 ++++++++++++++ .../dl-op-linalg-mathfpow-benchmark.html | 37 ++++++++++++++ .../dl-op-linalg-mathrsqrt-benchmark.html | 37 ++++++++++++++ .../dl-op-linalg-matmul-benchmark.html | 43 +++++++++++++++++ ...-op-linalg-pooling-nhwc-sum-benchmark.html | 37 ++++++++++++++ ...-linalg-softmax-exp-sum-div-benchmark.html | 37 ++++++++++++++ .../dl-op-matmul-transpose-b-benchmark.html | 41 ++++++++++++++++ .../dl-op-tosa-transpose-benchmark.html | 35 ++++++++++++++ site/index.html | 26 ++++++++++ site/vectorization/vectorization_matrix.html | 6 +-- .../dl-op-linalg-reduceaddf-benchmark.json | 38 --------------- .../dl-op-linalg-reducemaxf-benchmark.json | 38 --------------- .../image-processing-result.log | 2 +- .../vectorization/vectorization_matrix.json | 16 +++---- .../vectorization/vectorization_result.log | 44 ++++++++--------- 34 files changed, 1044 insertions(+), 110 deletions(-) create mode 100644 site/deeplearning/dl-layer-ffn-benchmark.html create mode 100644 site/deeplearning/dl-layer-rmsnorm-benchmark.html create mode 100644 site/deeplearning/dl-layer-selfattention-benchmark.html create mode 100644 site/deeplearning/dl-model-lenet-benchmark.html create mode 100644 site/deeplearning/dl-model-mobilenetv3-benchmark.html create mode 100644 site/deeplearning/dl-model-resnet18-benchmark.html create mode 100644 site/deeplearning/dl-model-tinyllama-benchmark.html create mode 100644 site/deeplearning/dl-model-whisper-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-arithaddf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-arithdivf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-arithmulf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-arithnegf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-arithsubf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-mathexp-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-mathfpow-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-matmul-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html create mode 100644 site/deeplearning/dl-op-matmul-transpose-b-benchmark.html create mode 100644 site/deeplearning/dl-op-tosa-transpose-benchmark.html delete mode 100644 test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json delete mode 100644 test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json diff --git a/scripts/logs2html.py b/scripts/logs2html.py index 3123d0ff..2ea37b24 100755 --- a/scripts/logs2html.py +++ b/scripts/logs2html.py @@ -38,6 +38,7 @@ def gbench_json_to_table(js_path: pathlib.Path) -> str: # --------------------------------------------------------------------------- for js in src.rglob("*.json"): + print("→ parsing", js) log = js.with_suffix(".log") # same stem, optional rel = js.relative_to(src) page = dst / rel.with_suffix(".html") diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html new file mode 100644 index 00000000..df2fafab --- /dev/null +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -0,0 +1,36 @@ + + + +

        deeplearning/dl-layer-ffn-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-layer-ffn-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_FFN/Scalar0.10.110758
        DL_LAYER_FFN/Auto_Vectorization0.00.025878
        +
        Console output +
        2025-05-26T21:12:34+00:00
        +Running ./dl-layer-ffn-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.20, 1.96
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------
        +Benchmark                                Time             CPU   Iterations
        +--------------------------------------------------------------------------
        +DL_LAYER_FFN/Scalar                  0.065 ms        0.065 ms        10758
        +DL_LAYER_FFN/Auto_Vectorization      0.027 ms        0.027 ms        25878
        +-----------------------------------------------------------
        +Correctness Verification: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html new file mode 100644 index 00000000..9be2d2c9 --- /dev/null +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -0,0 +1,36 @@ + + + +

        deeplearning/dl-layer-rmsnorm-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-layer-rmsnorm-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_RMSNORM/Scalar0.00.0358748
        DL_LAYER_RMSNORM/Auto_Vectorization0.00.0753724
        +
        Console output +
        2025-05-26T21:12:38+00:00
        +Running ./dl-layer-rmsnorm-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.19, 1.95
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +------------------------------------------------------------------------------
        +Benchmark                                    Time             CPU   Iterations
        +------------------------------------------------------------------------------
        +DL_LAYER_RMSNORM/Scalar                  0.002 ms        0.002 ms       358748
        +DL_LAYER_RMSNORM/Auto_Vectorization      0.001 ms        0.001 ms       753724
        +-----------------------------------------------------------
        +Correctness Verification: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html new file mode 100644 index 00000000..016a28a1 --- /dev/null +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -0,0 +1,36 @@ + + + +

        deeplearning/dl-layer-selfattention-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-layer-selfattention-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_ATTENTION/Scalar4.74.7148
        DL_LAYER_ATTENTION/Auto_Vectorization1.61.6447
        +
        Console output +
        2025-05-26T21:12:36+00:00
        +Running ./dl-layer-selfattention-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.19, 1.95
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------
        +Benchmark                                      Time             CPU   Iterations
        +--------------------------------------------------------------------------------
        +DL_LAYER_ATTENTION/Scalar                   4.71 ms         4.71 ms          148
        +DL_LAYER_ATTENTION/Auto_Vectorization       1.57 ms         1.57 ms          447
        +-----------------------------------------------------------
        +Correctness Verification: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html new file mode 100644 index 00000000..09b968ee --- /dev/null +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-model-lenet-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-model-lenet-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_LENET/Auto_Vectorization0.20.24396
        DL_MODEL_LENET/Buddy_Vectorization0.10.15074
        +
        Console output +
        2025-05-26T21:08:36+00:00
        +Running ./dl-model-lenet-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.37, 1.45, 2.25
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +-----------------------------------------------------------------------------
        +Benchmark                                   Time             CPU   Iterations
        +-----------------------------------------------------------------------------
        +DL_MODEL_LENET/Auto_Vectorization       0.155 ms        0.155 ms         4396
        +DL_MODEL_LENET/Buddy_Vectorization      0.137 ms        0.137 ms         5074
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html new file mode 100644 index 00000000..7d1544d6 --- /dev/null +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-model-mobilenetv3-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-model-mobilenetv3-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_MobileNet_V3/BM_MobileNet_V3_scalar35.335.320
        BM_MobileNet_V3/BM_MobileNet_V3_conv_opt32.232.221
        +
        Console output +
        2025-05-26T21:08:34+00:00
        +Running ./dl-model-mobilenetv3-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.40, 1.46, 2.26
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +-----------------------------------------------------------------------------------
        +Benchmark                                         Time             CPU   Iterations
        +-----------------------------------------------------------------------------------
        +BM_MobileNet_V3/BM_MobileNet_V3_scalar         35.3 ms         35.3 ms           20
        +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt       32.2 ms         32.2 ms           21
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html new file mode 100644 index 00000000..bdef866b --- /dev/null +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -0,0 +1,36 @@ + + + +

        deeplearning/dl-model-resnet18-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-model-resnet18-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Resnet18/Auto_Vectorization725.8717.91
        DL_MODEL_Resnet18/Buddy_Vectorization723.3723.31
        +
        Console output +
        2025-05-26T21:12:31+00:00
        +Running ./dl-model-resnet18-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.20, 1.96
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------
        +Benchmark                                      Time             CPU   Iterations
        +--------------------------------------------------------------------------------
        +DL_MODEL_Resnet18/Auto_Vectorization         726 ms          718 ms            1
        +DL_MODEL_Resnet18/Buddy_Vectorization        723 ms          723 ms            1
        +-----------------------------------------------------------
        +Correctness Verification: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html new file mode 100644 index 00000000..7313640a --- /dev/null +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -0,0 +1,38 @@ + + + +

        deeplearning/dl-model-tinyllama-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-model-tinyllama-benchmark.json

        + + + +
        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_TINYLLAMA/scalar145312.6145306.01
        DL_MODEL_TINYLLAMA/matmul_opt9843.29842.81
        DL_MODEL_TINYLLAMA/matmul_opt_omp7800.47157.21
        +
        Console output +
        2025-05-26T21:03:18+00:00
        +Running ./dl-model-tinyllama-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.60, 2.10, 2.74
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +----------------------------------------------------------------------------
        +Benchmark                                  Time             CPU   Iterations
        +----------------------------------------------------------------------------
        +DL_MODEL_TINYLLAMA/scalar             145313 ms       145306 ms            1
        +DL_MODEL_TINYLLAMA/matmul_opt           9843 ms         9843 ms            1
        +DL_MODEL_TINYLLAMA/matmul_opt_omp       7800 ms         7157 ms            1
        +---------- Verification ----------
        +matmul_opt PASS
        +matmul_opt_omp PASS
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html new file mode 100644 index 00000000..f1451e15 --- /dev/null +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-model-whisper-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-model-whisper-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Whisper/Auto_Vectorization80864.480855.31
        DL_MODEL_Whisper/Buddy_Vectorization35875.835871.51
        +
        Console output +
        2025-05-26T21:08:38+00:00
        +Running ./dl-model-whisper-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.37, 1.45, 2.25
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +-------------------------------------------------------------------------------
        +Benchmark                                     Time             CPU   Iterations
        +-------------------------------------------------------------------------------
        +DL_MODEL_Whisper/Auto_Vectorization       80864 ms        80855 ms            1
        +DL_MODEL_Whisper/Buddy_Vectorization      35876 ms        35871 ms            1
        +-----------------------------------------------------------
        +Correctness Verification for Output1: PASS
        +Correctness Verification for Output2: FAIL
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html new file mode 100644 index 00000000..97cfd0db --- /dev/null +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-arithaddf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-arithaddf-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_ADDF_SCALAR0.00.023951
        BM_ADDF_AutoVectorization0.00.0174606
        +
        Console output +
        2025-05-26T21:13:08+00:00
        +Running ./dl-op-linalg-arithaddf-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.17, 1.92
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------
        +Benchmark                          Time             CPU   Iterations
        +--------------------------------------------------------------------
        +BM_ADDF_SCALAR                 0.029 ms        0.029 ms        23951
        +BM_ADDF_AutoVectorization      0.004 ms        0.004 ms       174606
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html new file mode 100644 index 00000000..8d6cd465 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-arithdivf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-arithdivf-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_DIVF_SCALAR0.00.022508
        BM_DIVF_AutoVectorization0.00.073818
        +
        Console output +
        2025-05-26T21:13:11+00:00
        +Running ./dl-op-linalg-arithdivf-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.17, 1.92
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------
        +Benchmark                          Time             CPU   Iterations
        +--------------------------------------------------------------------
        +BM_DIVF_SCALAR                 0.030 ms        0.030 ms        22508
        +BM_DIVF_AutoVectorization      0.010 ms        0.010 ms        73818
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html new file mode 100644 index 00000000..6ffe9554 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-arithmulf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-arithmulf-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_MULF_SCALAR0.00.023392
        BM_MULF_AutoVectorization0.00.0175155
        +
        Console output +
        2025-05-26T21:13:12+00:00
        +Running ./dl-op-linalg-arithmulf-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.17, 1.92
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------
        +Benchmark                          Time             CPU   Iterations
        +--------------------------------------------------------------------
        +BM_MULF_SCALAR                 0.029 ms        0.029 ms        23392
        +BM_MULF_AutoVectorization      0.004 ms        0.004 ms       175155
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html new file mode 100644 index 00000000..afb5b573 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-arithnegf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-arithnegf-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_NEGF_SCALAR0.00.030765
        BM_NEGF_AutoVectorization0.00.0290149
        +
        Console output +
        2025-05-26T21:13:15+00:00
        +Running ./dl-op-linalg-arithnegf-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.17, 1.92
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------
        +Benchmark                          Time             CPU   Iterations
        +--------------------------------------------------------------------
        +BM_NEGF_SCALAR                 0.023 ms        0.023 ms        30765
        +BM_NEGF_AutoVectorization      0.002 ms        0.002 ms       290149
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html new file mode 100644 index 00000000..061bfc84 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-arithsubf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-arithsubf-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_SUBF_SCALAR0.00.023979
        BM_SUBF_AutoVectorization0.00.0175235
        +
        Console output +
        2025-05-26T21:13:17+00:00
        +Running ./dl-op-linalg-arithsubf-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.17, 1.91
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------
        +Benchmark                          Time             CPU   Iterations
        +--------------------------------------------------------------------
        +BM_SUBF_SCALAR                 0.030 ms        0.030 ms        23979
        +BM_SUBF_AutoVectorization      0.004 ms        0.004 ms       175235
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html new file mode 100644 index 00000000..0813e6c2 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -0,0 +1,48 @@ + + + +

        deeplearning/dl-op-linalg-batch-matmul-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-batch-matmul-benchmark.json

        + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_BATCH_MATMUL/Scalar/iterations:13551.43551.01
        DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:11002.21002.11
        DL_OPS_BATCH_MATMUL/Vectorization/iterations:1191.7191.71
        DL_OPS_BATCH_MATMUL/Tile/iterations:1109.6109.61
        DL_OPS_BATCH_MATMUL/SCF/iterations:1117.3117.31
        DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1356.3356.21
        DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:179.131.51
        +
        Console output +
        2025-05-26T21:12:58+00:00
        +Running ./dl-op-linalg-batch-matmul-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.18, 1.93
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +---------------------------------------------------------------------------------------------
        +Benchmark                                                   Time             CPU   Iterations
        +---------------------------------------------------------------------------------------------
        +DL_OPS_BATCH_MATMUL/Scalar/iterations:1                  3551 ms         3551 ms            1
        +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1       1002 ms         1002 ms            1
        +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1            192 ms          192 ms            1
        +DL_OPS_BATCH_MATMUL/Tile/iterations:1                     110 ms          110 ms            1
        +DL_OPS_BATCH_MATMUL/SCF/iterations:1                      117 ms          117 ms            1
        +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1                356 ms          356 ms            1
        +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1           79.1 ms         31.5 ms            1
        +---------- Verification ----------
        +Tile PASS
        +SCF PASS
        +BROADCAST PASS
        +BROADCAST_OMP PASS
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html new file mode 100644 index 00000000..4b1930c2 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_Conv2DNchwFchw_SCALAR282.0282.02
        BM_Conv2DNchwFchw_Im2col11.811.862
        +
        Console output +
        2025-05-26T21:12:52+00:00
        +Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.18, 1.94
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +-------------------------------------------------------------------
        +Benchmark                         Time             CPU   Iterations
        +-------------------------------------------------------------------
        +BM_Conv2DNchwFchw_SCALAR        282 ms          282 ms            2
        +BM_Conv2DNchwFchw_Im2col       11.8 ms         11.8 ms           62
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html new file mode 100644 index 00000000..3ec8399e --- /dev/null +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -0,0 +1,41 @@ + + + +

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        + + + + +
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:573.773.65
        DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:59.39.35
        DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:51.71.75
        DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:51.71.75
        +
        Console output +
        2025-05-26T21:12:56+00:00
        +Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.18, 1.93
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +---------------------------------------------------------------------------------------------------
        +Benchmark                                                         Time             CPU   Iterations
        +---------------------------------------------------------------------------------------------------
        +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5                   73.7 ms         73.6 ms            5
        +DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5       9.34 ms         9.34 ms            5
        +DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5            1.74 ms         1.74 ms            5
        +DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5                 1.73 ms         1.73 ms            5
        +---------- Verification ----------
        +auto_vectorization PASS
        +vectorization PASS
        +vec_tile PASS
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html new file mode 100644 index 00000000..11ea1d0e --- /dev/null +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_CONV_2D_NHWC_HWCF_SCALAR32.632.622
        BM_CONV_2D_NHWC_HWCF_AutoVectorization6.16.1115
        +
        Console output +
        2025-05-26T21:12:54+00:00
        +Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.18, 1.94
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +---------------------------------------------------------------------------------
        +Benchmark                                       Time             CPU   Iterations
        +---------------------------------------------------------------------------------
        +BM_CONV_2D_NHWC_HWCF_SCALAR                  32.6 ms         32.6 ms           22
        +BM_CONV_2D_NHWC_HWCF_AutoVectorization       6.07 ms         6.07 ms          115
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html new file mode 100644 index 00000000..beda2924 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -0,0 +1,38 @@ + + + +

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        + + + +
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.84.85
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:51.71.75
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:50.10.15
        +
        Console output +
        2025-05-26T21:12:56+00:00
        +Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.18, 1.93
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +------------------------------------------------------------------------------------------------------------
        +Benchmark                                                                  Time             CPU   Iterations
        +------------------------------------------------------------------------------------------------------------
        +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5                   4.84 ms         4.84 ms            5
        +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5       1.68 ms         1.68 ms            5
        +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5           0.120 ms        0.120 ms            5
        +---------- Verification ----------
        +auto_vectorization PASS
        +vectorization PASS
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html new file mode 100644 index 00000000..5eb2b117 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-mathexp-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-mathexp-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_EXP_SCALAR0.00.015289
        BM_EXP_AutoVectorization0.00.022195
        +
        Console output +
        2025-05-26T21:13:22+00:00
        +Running ./dl-op-linalg-mathexp-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.08, 1.18, 1.91
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +-------------------------------------------------------------------
        +Benchmark                         Time             CPU   Iterations
        +-------------------------------------------------------------------
        +BM_EXP_SCALAR                 0.046 ms        0.046 ms        15289
        +BM_EXP_AutoVectorization      0.032 ms        0.032 ms        22195
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html new file mode 100644 index 00000000..cadf7bdf --- /dev/null +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-mathfpow-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-mathfpow-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_FPOW_SCALAR0.10.18096
        BM_FPOW_AutoVectorization0.10.112303
        +
        Console output +
        2025-05-26T21:13:19+00:00
        +Running ./dl-op-linalg-mathfpow-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.17, 1.91
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------
        +Benchmark                          Time             CPU   Iterations
        +--------------------------------------------------------------------
        +BM_FPOW_SCALAR                 0.083 ms        0.083 ms         8096
        +BM_FPOW_AutoVectorization      0.057 ms        0.057 ms        12303
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html new file mode 100644 index 00000000..cd3f4c39 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-mathrsqrt-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_RSQRT_SCALAR0.10.19407
        BM_RSQRT_AutoVectorization0.00.0161010
        +
        Console output +
        2025-05-26T21:13:20+00:00
        +Running ./dl-op-linalg-mathrsqrt-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.08, 1.18, 1.91
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +---------------------------------------------------------------------
        +Benchmark                           Time             CPU   Iterations
        +---------------------------------------------------------------------
        +BM_RSQRT_SCALAR                 0.073 ms        0.073 ms         9407
        +BM_RSQRT_AutoVectorization      0.004 ms        0.004 ms       161010
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html new file mode 100644 index 00000000..9cc4a3dd --- /dev/null +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -0,0 +1,43 @@ + + + +

        deeplearning/dl-op-linalg-matmul-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-matmul-benchmark.json

        + + + + + +
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL/scalar_O0/iterations:14219.84219.71
        DL_OPS_MATMUL/scalar_O3/iterations:13393.83393.61
        DL_OPS_MATMUL/tile/iterations:1117.1117.11
        DL_OPS_MATMUL/vec/iterations:159.959.91
        DL_OPS_MATMUL/vec_omp/iterations:130.59.81
        +
        Console output +
        2025-05-26T21:12:40+00:00
        +Running ./dl-op-linalg-matmul-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.19, 1.95
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +-------------------------------------------------------------------------------
        +Benchmark                                     Time             CPU   Iterations
        +-------------------------------------------------------------------------------
        +DL_OPS_MATMUL/scalar_O0/iterations:1       4220 ms         4220 ms            1
        +DL_OPS_MATMUL/scalar_O3/iterations:1       3394 ms         3394 ms            1
        +DL_OPS_MATMUL/tile/iterations:1             117 ms          117 ms            1
        +DL_OPS_MATMUL/vec/iterations:1             59.9 ms         59.9 ms            1
        +DL_OPS_MATMUL/vec_omp/iterations:1         30.5 ms         9.79 ms            1
        +---------- Verification ----------
        +tile PASS
        +vec PASS
        +vec_omp PASS
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html new file mode 100644 index 00000000..1dbda987 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-pooling-nhwc-sum-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_POOLING_NHWC_SUM_SCALAR0.20.22972
        BM_POOLING_NHWC_SUM_AutoVectorization0.00.016865
        +
        Console output +
        2025-05-26T21:12:56+00:00
        +Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.18, 1.93
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------
        +Benchmark                                      Time             CPU   Iterations
        +--------------------------------------------------------------------------------
        +BM_POOLING_NHWC_SUM_SCALAR                 0.234 ms        0.234 ms         2972
        +BM_POOLING_NHWC_SUM_AutoVectorization      0.041 ms        0.041 ms        16865
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html new file mode 100644 index 00000000..1e310950 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html @@ -0,0 +1,37 @@ + + + +

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-linalg-softmax-exp-sum-div-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        BM_SOFTMAXEXPSUMDIV_SCALAR0.00.0125501
        BM_SOFTMAXEXPSUMDIV_AutoVectorization0.00.0181845
        +
        Console output +
        2025-05-26T21:13:25+00:00
        +Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.08, 1.18, 1.91
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------
        +Benchmark                                      Time             CPU   Iterations
        +--------------------------------------------------------------------------------
        +BM_SOFTMAXEXPSUMDIV_SCALAR                 0.006 ms        0.006 ms       125501
        +BM_SOFTMAXEXPSUMDIV_AutoVectorization      0.004 ms        0.004 ms       181845
        +-----------------------------------------------------------
        +Correctness Verification:
        +Transform case: PASS
        +-----------------------------------------------------------
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html new file mode 100644 index 00000000..8970b567 --- /dev/null +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -0,0 +1,41 @@ + + + +

        deeplearning/dl-op-matmul-transpose-b-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-matmul-transpose-b-benchmark.json

        + + + + +
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51265.31263.65
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5326.8326.85
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:530.318.85
        DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:583.683.65
        +
        Console output +
        2025-05-26T21:13:28+00:00
        +Running ./dl-op-matmul-transpose-b-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.07, 1.18, 1.91
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +-----------------------------------------------------------------------------------------------
        +Benchmark                                                     Time             CPU   Iterations
        +-----------------------------------------------------------------------------------------------
        +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5           1265 ms         1264 ms            5
        +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5            327 ms          327 ms            5
        +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5       30.3 ms         18.8 ms            5
        +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5                 83.6 ms         83.6 ms            5
        +---------- Verification ----------
        +scalar_O3 PASS
        +scalar_O3_omp PASS
        +vec PASS
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html new file mode 100644 index 00000000..fb55008f --- /dev/null +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -0,0 +1,35 @@ + + + +

        deeplearning/dl-op-tosa-transpose-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        dl-op-tosa-transpose-benchmark.json

        + + +
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:529.720.95
        DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:521.920.45
        +
        Console output +
        2025-05-26T21:13:27+00:00
        +Running ./dl-op-tosa-transpose-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.07, 1.18, 1.91
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +-------------------------------------------------------------------------------------
        +Benchmark                                           Time             CPU   Iterations
        +-------------------------------------------------------------------------------------
        +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5       29.7 ms         20.9 ms            5
        +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5       21.9 ms         20.4 ms            5
        +---------- Verification ----------
        +scalar_O3 PASS
        +
        \ No newline at end of file diff --git a/site/index.html b/site/index.html index 88dbcf5b..53741311 100644 --- a/site/index.html +++ b/site/index.html @@ -9,5 +9,31 @@ summary{font-weight:600;cursor:pointer}

        Buddy-Benchmark results

        \ No newline at end of file diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index 222a42ff..b9c720be 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -9,8 +9,8 @@ summary{font-weight:600;cursor:pointer} -

        vectorization/vectorization_matrix.json

        2025-05-26 20:28:57 UTC

        +

        vectorization/vectorization_matrix.json

        2025-05-26 21:37:50 UTC

        vectorization_matrix.json

        - -
        NameTime (ns)CPU (ns)Iterations
        MLIR_MatMul/119.119.136693381
        MLIR_MatVec/120.920.933941913
        \ No newline at end of file +MLIR_MatMul/119.619.636524480 +MLIR_MatVec/121.321.332313641 \ No newline at end of file diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json deleted file mode 100644 index b9374b08..00000000 --- a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "context": { - "date": "2025-05-26T21:13:25+00:00", - "host_name": "4ed4bacfe45d", - "executable": "./dl-op-linalg-reduceaddf-benchmark", - "num_cpus": 24, - "mhz_per_cpu": 5100, - "cpu_scaling_enabled": true, - "caches": [ - { - "type": "Data", - "level": 1, - "size": 49152, - "num_sharing": 2 - }, - { - "type": "Instruction", - "level": 1, - "size": 32768, - "num_sharing": 2 - }, - { - "type": "Unified", - "level": 2, - "size": 1310720, - "num_sharing": 2 - }, - { - "type": "Unified", - "level": 3, - "size": 31457280, - "num_sharing": 24 - } - ], - "load_avg": [1.08008,1.18115,1.91162], - "library_build_type": "release" - }, - "benchmarks": [ diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json deleted file mode 100644 index 1df2bbe8..00000000 --- a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "context": { - "date": "2025-05-26T21:13:25+00:00", - "host_name": "4ed4bacfe45d", - "executable": "./dl-op-linalg-reducemaxf-benchmark", - "num_cpus": 24, - "mhz_per_cpu": 5100, - "cpu_scaling_enabled": true, - "caches": [ - { - "type": "Data", - "level": 1, - "size": 49152, - "num_sharing": 2 - }, - { - "type": "Instruction", - "level": 1, - "size": 32768, - "num_sharing": 2 - }, - { - "type": "Unified", - "level": 2, - "size": 1310720, - "num_sharing": 2 - }, - { - "type": "Unified", - "level": 3, - "size": 31457280, - "num_sharing": 24 - } - ], - "load_avg": [1.08008,1.18115,1.91162], - "library_build_type": "release" - }, - "benchmarks": [ diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log index 0ee3637b..1326e727 100644 --- a/test_result/imageprocessing/image-processing-result.log +++ b/test_result/imageprocessing/image-processing-result.log @@ -1,4 +1,4 @@ -Benchmark results - Mon May 26 20:28:28 UTC 2025 +Benchmark results - Mon May 26 21:31:02 UTC 2025 Testing SSE support SSE is supported. Running image-processing-benchmark for SSE diff --git a/test_result/vectorization/vectorization_matrix.json b/test_result/vectorization/vectorization_matrix.json index a8af91a1..994891e0 100644 --- a/test_result/vectorization/vectorization_matrix.json +++ b/test_result/vectorization/vectorization_matrix.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T20:28:53+00:00", + "date": "2025-05-26T21:31:26+00:00", "host_name": "4ed4bacfe45d", "executable": "./vectorization-matrix-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.49609,3.41797,6.28662], + "load_avg": [17.2627,16.4761,8.59082], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 36693381, - "real_time": 1.9134156139629500e+01, - "cpu_time": 1.9131720268568333e+01, + "iterations": 36524480, + "real_time": 1.9617160629438615e+01, + "cpu_time": 1.9616760320749265e+01, "time_unit": "ns" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 33941913, - "real_time": 2.0856437909828337e+01, - "cpu_time": 2.0853729811870071e+01, + "iterations": 32313641, + "real_time": 2.1252938182361376e+01, + "cpu_time": 2.1252621114408001e+01, "time_unit": "ns" } ] diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log index f59d0d3e..f6bdcb35 100755 --- a/test_result/vectorization/vectorization_result.log +++ b/test_result/vectorization/vectorization_result.log @@ -1,4 +1,4 @@ -Vectorization Benchmark - Mon May 26 20:28:47 UTC 2025 +Vectorization Benchmark - Mon May 26 21:31:20 UTC 2025 [Info] Starting vectorization-matrix-benchmark build... [Info] Running CMake configuration... -- The CXX compiler identification is GNU 11.4.0 @@ -39,8 +39,8 @@ Vectorization Benchmark - Mon May 26 20:28:47 UTC 2025 [Info] Building vectorization-matrix-benchmark... [1/17] Generating mlir-matmul.o [2/17] Generating mlir-matvec.o -[3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a -[4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a +[3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a +[4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a [5/17] Creating directories for 'project_googlebenchmark' [6/17] Performing download step (git clone) for 'project_googlebenchmark' Cloning into 'project_googlebenchmark'... @@ -128,23 +128,23 @@ Call Stack (most recent call first): -- Generating done -- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build [10/17] Performing build step for 'project_googlebenchmark' -[1/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o -[2/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o -[3/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o -[4/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o -[5/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o -[6/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o -[7/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o -[8/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o -[9/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o -[10/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o +[1/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[2/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o +[3/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o +[4/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o +[7/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[9/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[10/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o [11/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o -[12/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[12/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o [13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o [14/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o [15/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o -[16/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o -[17/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o +[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o [18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o [19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o [20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o @@ -176,11 +176,11 @@ Call Stack (most recent call first): [12/17] No test step for 'project_googlebenchmark' [13/17] Completed 'project_googlebenchmark' [14/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/Main.cpp.o -[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o -[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o +[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o +[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o [17/17] Linking CXX executable bin/vectorization-matrix-benchmark [Info] Running vectorization-matrix-benchmark... -2025-05-26T20:28:53+00:00 +2025-05-26T21:31:26+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -188,13 +188,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.50, 3.42, 6.29 +Load Average: 17.26, 16.48, 8.59 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 19.1 ns 19.1 ns 36693381 -MLIR_MatVec/1 20.9 ns 20.9 ns 33941913 +MLIR_MatMul/1 19.6 ns 19.6 ns 36524480 +MLIR_MatVec/1 21.3 ns 21.3 ns 32313641 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] From 998297bd73a7d978fb26fc1edf6216ad5cb0a532 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Tue, 27 May 2025 00:09:03 +0200 Subject: [PATCH 19/52] update web --- benchmarks/ImageProcessing/Main.cpp | 24 +- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- site/vectorization/vectorization_matrix.html | 6 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 164 +++++++++ .../image-processing-result.log | 210 ++--------- .../vectorization/vectorization_matrix.json | 16 +- .../vectorization/vectorization_result.log | 46 +-- 33 files changed, 620 insertions(+), 246 deletions(-) create mode 100644 test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json diff --git a/benchmarks/ImageProcessing/Main.cpp b/benchmarks/ImageProcessing/Main.cpp index 3f2f2eeb..81b68103 100755 --- a/benchmarks/ImageProcessing/Main.cpp +++ b/benchmarks/ImageProcessing/Main.cpp @@ -80,18 +80,18 @@ void registerBenchmarkOpenCVResize2D(); // Run benchmarks. int main(int argc, char **argv) { - if (argc != 5) { - throw std::invalid_argument( - "Wrong format of command line arguments.\n" - "Correct format is ./image-processing-benchmark \n where " - "image path provides path of the image to be processed, kernel name " - "denotes the name " - "of desired kernel as specified in " - "kernelmorph denotes the kernel to be used for morphological operations" - "include/ImageProcessing/Kernels.h and Boundary options available " - "are CONSTANT_PADDING, REPLICATE_PADDING.\n"); - } + // if (argc != 5) { + // throw std::invalid_argument( + // "Wrong format of command line arguments.\n" + // "Correct format is ./image-processing-benchmark \n where " + // "image path provides path of the image to be processed, kernel name " + // "denotes the name " + // "of desired kernel as specified in " + // "kernelmorph denotes the kernel to be used for morphological operations" + // "include/ImageProcessing/Kernels.h and Boundary options available " + // "are CONSTANT_PADDING, REPLICATE_PADDING.\n"); + // } Img img = dip::imread(argv[1], dip::IMGRD_GRAYSCALE); diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index df2fafab..21b83bbd 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-ffn-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-layer-ffn-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-layer-ffn-benchmark.json

        diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index 9be2d2c9..abf61481 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-rmsnorm-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-layer-rmsnorm-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-layer-rmsnorm-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_FFN/Scalar0.10.110758
        diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index 016a28a1..d5f68fdd 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-selfattention-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-layer-selfattention-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-layer-selfattention-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_RMSNORM/Scalar0.00.0358748
        diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index 09b968ee..0bcd0c56 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-lenet-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-model-lenet-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-model-lenet-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_ATTENTION/Scalar4.74.7148
        diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index 7d1544d6..ee361415 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-mobilenetv3-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-model-mobilenetv3-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-model-mobilenetv3-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_LENET/Auto_Vectorization0.20.24396
        diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index bdef866b..eec5096a 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-resnet18-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-model-resnet18-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-model-resnet18-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_MobileNet_V3/BM_MobileNet_V3_scalar35.335.320
        diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index 7313640a..40c6a975 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-tinyllama-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-model-tinyllama-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-model-tinyllama-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Resnet18/Auto_Vectorization725.8717.91
        diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index f1451e15..bfb8798d 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-whisper-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-model-whisper-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-model-whisper-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_TINYLLAMA/scalar145312.6145306.01
        diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index 97cfd0db..54e2693a 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithaddf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-arithaddf-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-arithaddf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Whisper/Auto_Vectorization80864.480855.31
        diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 8d6cd465..66b5ae92 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithdivf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-arithdivf-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-arithdivf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_ADDF_SCALAR0.00.023951
        diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 6ffe9554..4277a79e 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithmulf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-arithmulf-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-arithmulf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_DIVF_SCALAR0.00.022508
        diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index afb5b573..e7b8024f 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithnegf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-arithnegf-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-arithnegf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_MULF_SCALAR0.00.023392
        diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index 061bfc84..e4aa1844 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithsubf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-arithsubf-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-arithsubf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_NEGF_SCALAR0.00.030765
        diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 0813e6c2..e9f3299c 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-batch-matmul-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-batch-matmul-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-batch-matmul-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_SUBF_SCALAR0.00.023979
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index 4b1930c2..678e86e5 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_BATCH_MATMUL/Scalar/iterations:13551.43551.01
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 3ec8399e..cedc4a37 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_Conv2DNchwFchw_SCALAR282.0282.02
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index 11ea1d0e..9a2fb8a3 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:573.773.65
        diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index beda2924..2141213d 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_CONV_2D_NHWC_HWCF_SCALAR32.632.622
        diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index 5eb2b117..83c7bcb6 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathexp-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-mathexp-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-mathexp-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.84.85
        diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index cadf7bdf..dcff453f 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathfpow-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-mathfpow-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-mathfpow-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_EXP_SCALAR0.00.015289
        diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index cd3f4c39..d99a58a2 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-mathrsqrt-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_FPOW_SCALAR0.10.18096
        diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index 9cc4a3dd..0bc90d03 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-matmul-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-matmul-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-matmul-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_RSQRT_SCALAR0.10.19407
        diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index 1dbda987..e0511cfa 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-pooling-nhwc-sum-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL/scalar_O0/iterations:14219.84219.71
        diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html index 1e310950..a94b9590 100644 --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-linalg-softmax-exp-sum-div-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_POOLING_NHWC_SUM_SCALAR0.20.22972
        diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 8970b567..95bbf8c3 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-matmul-transpose-b-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-matmul-transpose-b-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-matmul-transpose-b-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_SOFTMAXEXPSUMDIV_SCALAR0.00.0125501
        diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index fb55008f..542b1776 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-tosa-transpose-benchmark.json

        2025-05-26 21:37:50 UTC

        +

        deeplearning/dl-op-tosa-transpose-benchmark.json

        2025-05-26 21:43:11 UTC

        dl-op-tosa-transpose-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51265.31263.65
        diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index b9c720be..2e73bd2f 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -9,8 +9,8 @@ summary{font-weight:600;cursor:pointer} -

        vectorization/vectorization_matrix.json

        2025-05-26 21:37:50 UTC

        +

        vectorization/vectorization_matrix.json

        2025-05-26 21:43:11 UTC

        vectorization_matrix.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:529.720.95
        - -
        NameTime (ns)CPU (ns)Iterations
        MLIR_MatMul/119.619.636524480
        MLIR_MatVec/121.321.332313641
        \ No newline at end of file +MLIR_MatMul/119.519.535685429 +MLIR_MatVec/121.421.433101404 \ No newline at end of file diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..e9cc3462 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:07:48+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.08887,1.48682,3.5083], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 140, + "real_time": 4.9964816575603823e+00, + "cpu_time": 4.9958225071428561e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 95, + "real_time": 7.3868563496752788e+00, + "cpu_time": 7.3858671157894733e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1443, + "real_time": 5.0951387766782230e-01, + "cpu_time": 5.0944817117117125e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 868, + "real_time": 8.0464044285397374e-01, + "cpu_time": 8.0453992165898691e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 547, + "real_time": 1.2804496032228418e+00, + "cpu_time": 1.2802703528336392e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4810, + "real_time": 1.4675747014566667e-01, + "cpu_time": 1.4673347920997926e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2572, + "real_time": 2.7049120086857914e-01, + "cpu_time": 2.7039851360808687e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 98744, + "real_time": 6.8054141457585182e-03, + "cpu_time": 6.8044771530422123e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 46694, + "real_time": 1.4306057277887686e-02, + "cpu_time": 1.4305728487600131e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3144, + "real_time": 2.2463264469882005e-01, + "cpu_time": 2.2462909382951662e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3072, + "real_time": 2.2357795508772446e-01, + "cpu_time": 2.2354433854166664e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1655, + "real_time": 4.0703193253620873e-01, + "cpu_time": 4.0702098006042287e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1766, + "real_time": 3.7941525744048549e-01, + "cpu_time": 3.7936569309173185e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 721, + "real_time": 8.9330576236221559e-01, + "cpu_time": 8.9318446185852929e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 724, + "real_time": 9.3049459282864522e-01, + "cpu_time": 9.3035631767955873e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5029, + "real_time": 1.3920862829917513e-01, + "cpu_time": 1.3919029707695360e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3135, + "real_time": 2.2619957939099278e-01, + "cpu_time": 2.2619461626794310e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3062, + "real_time": 2.2434361965184427e-01, + "cpu_time": 2.2434184650555231e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2642, + "real_time": 2.6697811249708425e-01, + "cpu_time": 2.6697067903103733e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2674, + "real_time": 2.6228858078582662e-01, + "cpu_time": 2.6228528160059844e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2688, + "real_time": 2.6034137824483750e-01, + "cpu_time": 2.6033733519345176e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4937, + "real_time": 1.4531036511784939e-01, + "cpu_time": 1.4529212335426331e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..a1c3c244 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,164 @@ +{ + "context": { + "date": "2025-05-26T22:08:12+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.0625,1.521,3.4751], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 137, + "real_time": 5.1337036229398132e+00, + "cpu_time": 5.1329572627737230e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 92, + "real_time": 7.5545981728836242e+00, + "cpu_time": 7.5545316195652177e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1241, + "real_time": 5.9495814967117033e-01, + "cpu_time": 5.9494254391619683e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 864, + "real_time": 8.1512168549967035e-01, + "cpu_time": 8.1501009375000022e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 548, + "real_time": 1.2782183135893659e+00, + "cpu_time": 1.2780402226277368e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4723, + "real_time": 1.4875183663213251e-01, + "cpu_time": 1.4873103324158377e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2619, + "real_time": 2.6808699088703819e-01, + "cpu_time": 2.6804946315387551e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103122, + "real_time": 6.7839405446529655e-03, + "cpu_time": 6.7830443067434720e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 46927, + "real_time": 1.4897604358085247e-02, + "cpu_time": 1.4895712084727326e-02, + "time_unit": "ms" + } \ No newline at end of file diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log index 1326e727..7d6038f0 100644 --- a/test_result/imageprocessing/image-processing-result.log +++ b/test_result/imageprocessing/image-processing-result.log @@ -1,179 +1,41 @@ -Benchmark results - Mon May 26 21:31:02 UTC 2025 +Benchmark results - Mon May 26 22:07:47 UTC 2025 Testing SSE support SSE is supported. Running image-processing-benchmark for SSE Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - +2025-05-26T22:07:48+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.09, 1.49, 3.51 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.00 ms 5.00 ms 140 +MLIR_Conv2D/1 7.39 ms 7.39 ms 95 +Buddy_Conv2D/1 0.510 ms 0.509 ms 1443 +Buddy_Corr2D_Constant_Padding/1 0.805 ms 0.805 ms 868 +OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 547 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.147 ms 0.147 ms 4810 +Buddy_Resize2D_Bilinear_Interpolation/1 0.270 ms 0.270 ms 2572 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 98744 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 46694 +Buddy_Erosion2D_Constant_Padding/1 0.225 ms 0.225 ms 3144 +Buddy_Dilation2D_Constant_Padding/1 0.224 ms 0.224 ms 3072 +Buddy_Opening2D_Constant_Padding/1 0.407 ms 0.407 ms 1655 +Buddy_Closing2D_Constant_Padding/1 0.379 ms 0.379 ms 1766 +Buddy_TopHat2D_Constant_Padding/1 0.893 ms 0.893 ms 721 +Buddy_BottomHat2D_Constant_Padding/1 0.930 ms 0.930 ms 724 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5029 +OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3135 +OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3062 +OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2642 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2674 +OpenCV_MorphGrad2D_Constant_Padding/1 0.260 ms 0.260 ms 2688 +OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4937 Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Testing AVX2 support -AVX2 is supported. -Running image-processing-benchmark for AVX2 -Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -terminate called after throwing an instance of 'std::invalid_argument' - what(): Wrong format of command line arguments. -Correct format is ./image-processing-benchmark - where image path provides path of the image to be processed, kernel name denotes the name of desired kernel as specified in kernelmorph denotes the kernel to be used for morphological operationsinclude/ImageProcessing/Kernels.h and Boundary options available are CONSTANT_PADDING, REPLICATE_PADDING. - -Testing AVX512 support -CPU does not support AVX512. -Testing NEON support -CPU does not support NEON. diff --git a/test_result/vectorization/vectorization_matrix.json b/test_result/vectorization/vectorization_matrix.json index 994891e0..a25f391c 100644 --- a/test_result/vectorization/vectorization_matrix.json +++ b/test_result/vectorization/vectorization_matrix.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:31:26+00:00", + "date": "2025-05-26T21:43:08+00:00", "host_name": "4ed4bacfe45d", "executable": "./vectorization-matrix-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [17.2627,16.4761,8.59082], + "load_avg": [17.3032,18.9692,12.2134], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 36524480, - "real_time": 1.9617160629438615e+01, - "cpu_time": 1.9616760320749265e+01, + "iterations": 35685429, + "real_time": 1.9466575990653187e+01, + "cpu_time": 1.9465874124702275e+01, "time_unit": "ns" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 32313641, - "real_time": 2.1252938182361376e+01, - "cpu_time": 2.1252621114408001e+01, + "iterations": 33101404, + "real_time": 2.1422448062752832e+01, + "cpu_time": 2.1422267405938427e+01, "time_unit": "ns" } ] diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log index f6bdcb35..76a41ad1 100755 --- a/test_result/vectorization/vectorization_result.log +++ b/test_result/vectorization/vectorization_result.log @@ -1,4 +1,4 @@ -Vectorization Benchmark - Mon May 26 21:31:20 UTC 2025 +Vectorization Benchmark - Mon May 26 21:43:02 UTC 2025 [Info] Starting vectorization-matrix-benchmark build... [Info] Running CMake configuration... -- The CXX compiler identification is GNU 11.4.0 @@ -128,23 +128,23 @@ Call Stack (most recent call first): -- Generating done -- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build [10/17] Performing build step for 'project_googlebenchmark' -[1/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o -[2/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o -[3/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o -[4/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o -[5/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o -[6/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o +[1/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o +[2/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[3/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[4/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o [7/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o -[8/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o -[9/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o -[10/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o -[11/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o -[12/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o -[13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o -[14/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o -[15/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o -[16/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o -[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o +[9/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[10/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[11/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o +[12/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o +[13/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o +[15/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[17/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o [18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o [19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o [20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o @@ -176,11 +176,11 @@ Call Stack (most recent call first): [12/17] No test step for 'project_googlebenchmark' [13/17] Completed 'project_googlebenchmark' [14/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/Main.cpp.o -[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o -[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o +[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o +[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o [17/17] Linking CXX executable bin/vectorization-matrix-benchmark [Info] Running vectorization-matrix-benchmark... -2025-05-26T21:31:26+00:00 +2025-05-26T21:43:08+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -188,13 +188,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 17.26, 16.48, 8.59 +Load Average: 17.30, 18.97, 12.21 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 19.6 ns 19.6 ns 36524480 -MLIR_MatVec/1 21.3 ns 21.3 ns 32313641 +MLIR_MatMul/1 19.5 ns 19.5 ns 35685429 +MLIR_MatVec/1 21.4 ns 21.4 ns 33101404 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] From c38f1685e4d732e34ae8937efd81f7184308c526 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Tue, 27 May 2025 00:26:55 +0200 Subject: [PATCH 20/52] update web --- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 + ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 + site/index.html | 39 - site/vectorization/vectorization_matrix.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 80 ++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 132 +-- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 244 +++- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 348 ++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ .../image-processing-result.log | 1006 ++++++++++++++++- 82 files changed, 10970 insertions(+), 185 deletions(-) create mode 100644 site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/index.html create mode 100644 test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index 21b83bbd..7d85a5c2 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-ffn-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-layer-ffn-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-layer-ffn-benchmark.json

        diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index abf61481..e128e7b3 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-rmsnorm-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-layer-rmsnorm-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-layer-rmsnorm-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_FFN/Scalar0.10.110758
        diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index d5f68fdd..0769c15a 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-selfattention-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-layer-selfattention-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-layer-selfattention-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_RMSNORM/Scalar0.00.0358748
        diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index 0bcd0c56..447e4139 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-lenet-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-model-lenet-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-model-lenet-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_ATTENTION/Scalar4.74.7148
        diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index ee361415..32bba417 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-mobilenetv3-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-model-mobilenetv3-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-model-mobilenetv3-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_LENET/Auto_Vectorization0.20.24396
        diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index eec5096a..416bf9e6 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-resnet18-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-model-resnet18-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-model-resnet18-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_MobileNet_V3/BM_MobileNet_V3_scalar35.335.320
        diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index 40c6a975..36702c42 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-tinyllama-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-model-tinyllama-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-model-tinyllama-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Resnet18/Auto_Vectorization725.8717.91
        diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index bfb8798d..2d6b6b4d 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-whisper-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-model-whisper-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-model-whisper-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_TINYLLAMA/scalar145312.6145306.01
        diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index 54e2693a..84449327 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithaddf-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-arithaddf-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-arithaddf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Whisper/Auto_Vectorization80864.480855.31
        diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 66b5ae92..36a83ac5 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithdivf-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-arithdivf-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-arithdivf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_ADDF_SCALAR0.00.023951
        diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 4277a79e..43877a2c 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithmulf-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-arithmulf-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-arithmulf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_DIVF_SCALAR0.00.022508
        diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index e7b8024f..b6337d45 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithnegf-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-arithnegf-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-arithnegf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_MULF_SCALAR0.00.023392
        diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index e4aa1844..45b665ab 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithsubf-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-arithsubf-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-arithsubf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_NEGF_SCALAR0.00.030765
        diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index e9f3299c..0d496648 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-batch-matmul-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-batch-matmul-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-batch-matmul-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_SUBF_SCALAR0.00.023979
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index 678e86e5..97594737 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_BATCH_MATMUL/Scalar/iterations:13551.43551.01
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index cedc4a37..db78c360 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_Conv2DNchwFchw_SCALAR282.0282.02
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index 9a2fb8a3..1ba3701d 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:573.773.65
        diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 2141213d..00478064 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_CONV_2D_NHWC_HWCF_SCALAR32.632.622
        diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index 83c7bcb6..c4bd5ff7 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathexp-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-mathexp-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-mathexp-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.84.85
        diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index dcff453f..6a950c67 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathfpow-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-mathfpow-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-mathfpow-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_EXP_SCALAR0.00.015289
        diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index d99a58a2..d00ce7b4 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-mathrsqrt-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_FPOW_SCALAR0.10.18096
        diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index 0bc90d03..077ca934 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-matmul-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-matmul-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-matmul-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_RSQRT_SCALAR0.10.19407
        diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index e0511cfa..ff4fd434 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-pooling-nhwc-sum-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL/scalar_O0/iterations:14219.84219.71
        diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html index a94b9590..e50403bd 100644 --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-linalg-softmax-exp-sum-div-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_POOLING_NHWC_SUM_SCALAR0.20.22972
        diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 95bbf8c3..75862325 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-matmul-transpose-b-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-matmul-transpose-b-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-matmul-transpose-b-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_SOFTMAXEXPSUMDIV_SCALAR0.00.0125501
        diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index 542b1776..764741de 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-tosa-transpose-benchmark.json

        2025-05-26 21:43:11 UTC

        +

        deeplearning/dl-op-tosa-transpose-benchmark.json

        2025-05-26 22:24:35 UTC

        dl-op-tosa-transpose-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51265.31263.65
        diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..e0bf812c --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        +
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:529.720.95
        + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.40.41663
        Buddy_Corr2D_Constant_Padding/11.11.1651
        OpenCV_Filter2D_Constant_Padding/11.91.9367
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14748
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32632
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102984
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048927
        Buddy_Erosion2D_Constant_Padding/10.20.23155
        Buddy_Dilation2D_Constant_Padding/10.20.23149
        Buddy_Opening2D_Constant_Padding/10.40.42068
        Buddy_Closing2D_Constant_Padding/10.40.41957
        Buddy_TopHat2D_Constant_Padding/10.90.9731
        Buddy_BottomHat2D_Constant_Padding/10.90.9729
        OpenCV_Erode2D_Constant_Padding/10.10.15005
        OpenCV_Opening2D_Constant_Padding/10.20.23076
        OpenCV_Closing2D_Constant_Padding/10.20.23193
        OpenCV_TopHat2D_Constant_Padding/10.30.32760
        OpenCV_BottomHat2D_Constant_Padding/10.30.32736
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32716
        OpenCV_Dilate2D_Constant_Padding/10.10.14963
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..3721e999 --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.30.32234
        Buddy_Corr2D_Constant_Padding/10.80.8850
        OpenCV_Filter2D_Constant_Padding/11.31.3546
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14747
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32629
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102542
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048907
        Buddy_Erosion2D_Constant_Padding/10.20.23132
        Buddy_Dilation2D_Constant_Padding/10.20.23121
        Buddy_Opening2D_Constant_Padding/10.30.32059
        Buddy_Closing2D_Constant_Padding/10.30.32107
        Buddy_TopHat2D_Constant_Padding/10.90.9725
        Buddy_BottomHat2D_Constant_Padding/10.90.9744
        OpenCV_Erode2D_Constant_Padding/10.10.15040
        OpenCV_Opening2D_Constant_Padding/10.20.23222
        OpenCV_Closing2D_Constant_Padding/10.20.23151
        OpenCV_TopHat2D_Constant_Padding/10.30.32758
        OpenCV_BottomHat2D_Constant_Padding/10.30.32688
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32740
        OpenCV_Dilate2D_Constant_Padding/10.10.14990
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..b3e875ee --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.30.32233
        Buddy_Corr2D_Constant_Padding/10.80.8856
        OpenCV_Filter2D_Constant_Padding/11.31.3547
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14751
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32631
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103135
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048881
        Buddy_Erosion2D_Constant_Padding/10.20.23158
        Buddy_Dilation2D_Constant_Padding/10.20.23109
        Buddy_Opening2D_Constant_Padding/10.30.32007
        Buddy_Closing2D_Constant_Padding/10.30.32062
        Buddy_TopHat2D_Constant_Padding/10.90.9732
        Buddy_BottomHat2D_Constant_Padding/10.90.9731
        OpenCV_Erode2D_Constant_Padding/10.10.15071
        OpenCV_Opening2D_Constant_Padding/10.20.23192
        OpenCV_Closing2D_Constant_Padding/10.20.23221
        OpenCV_TopHat2D_Constant_Padding/10.30.32767
        OpenCV_BottomHat2D_Constant_Padding/10.30.32752
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32708
        OpenCV_Dilate2D_Constant_Padding/10.10.14910
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..30c37fee --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0139
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.30.32239
        Buddy_Corr2D_Constant_Padding/10.80.8863
        OpenCV_Filter2D_Constant_Padding/11.31.3548
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14797
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32637
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103585
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048856
        Buddy_Erosion2D_Constant_Padding/10.20.23141
        Buddy_Dilation2D_Constant_Padding/10.20.23162
        Buddy_Opening2D_Constant_Padding/10.30.32082
        Buddy_Closing2D_Constant_Padding/10.30.32093
        Buddy_TopHat2D_Constant_Padding/10.90.9727
        Buddy_BottomHat2D_Constant_Padding/10.90.9732
        OpenCV_Erode2D_Constant_Padding/10.10.15002
        OpenCV_Opening2D_Constant_Padding/10.20.23104
        OpenCV_Closing2D_Constant_Padding/10.20.23100
        OpenCV_TopHat2D_Constant_Padding/10.30.32764
        OpenCV_BottomHat2D_Constant_Padding/10.30.32700
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32725
        OpenCV_Dilate2D_Constant_Padding/10.10.14989
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..c4852253 --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.30.32227
        Buddy_Corr2D_Constant_Padding/10.80.8857
        OpenCV_Filter2D_Constant_Padding/11.31.3548
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14786
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32621
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103139
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048916
        Buddy_Erosion2D_Constant_Padding/10.20.23055
        Buddy_Dilation2D_Constant_Padding/10.30.33073
        Buddy_Opening2D_Constant_Padding/10.40.41855
        Buddy_Closing2D_Constant_Padding/10.40.42033
        Buddy_TopHat2D_Constant_Padding/10.90.9727
        Buddy_BottomHat2D_Constant_Padding/10.90.9732
        OpenCV_Erode2D_Constant_Padding/10.10.15021
        OpenCV_Opening2D_Constant_Padding/10.20.23154
        OpenCV_Closing2D_Constant_Padding/10.20.23178
        OpenCV_TopHat2D_Constant_Padding/10.30.32759
        OpenCV_BottomHat2D_Constant_Padding/10.30.32757
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32734
        OpenCV_Dilate2D_Constant_Padding/10.10.15010
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..9f6f4448 --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.860
        MLIR_Conv2D/129.929.923
        Buddy_Conv2D/11.31.3524
        Buddy_Corr2D_Constant_Padding/12.42.4294
        OpenCV_Filter2D_Constant_Padding/14.24.2166
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14766
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32632
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102992
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048914
        Buddy_Erosion2D_Constant_Padding/10.20.23110
        Buddy_Dilation2D_Constant_Padding/10.20.23094
        Buddy_Opening2D_Constant_Padding/10.40.41992
        Buddy_Closing2D_Constant_Padding/10.30.31972
        Buddy_TopHat2D_Constant_Padding/10.90.9731
        Buddy_BottomHat2D_Constant_Padding/10.90.9740
        OpenCV_Erode2D_Constant_Padding/10.10.15054
        OpenCV_Opening2D_Constant_Padding/10.20.23141
        OpenCV_Closing2D_Constant_Padding/10.20.23119
        OpenCV_TopHat2D_Constant_Padding/10.30.32692
        OpenCV_BottomHat2D_Constant_Padding/10.30.32695
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32680
        OpenCV_Dilate2D_Constant_Padding/10.10.15013
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..4fb507cb --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        MLIR_Conv2D/129.929.824
        Buddy_Conv2D/11.31.3542
        Buddy_Corr2D_Constant_Padding/12.42.4293
        OpenCV_Filter2D_Constant_Padding/14.24.2166
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14777
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32635
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102983
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048881
        Buddy_Erosion2D_Constant_Padding/10.20.23156
        Buddy_Dilation2D_Constant_Padding/10.20.23159
        Buddy_Opening2D_Constant_Padding/10.30.32113
        Buddy_Closing2D_Constant_Padding/10.30.32087
        Buddy_TopHat2D_Constant_Padding/10.90.9736
        Buddy_BottomHat2D_Constant_Padding/10.90.9740
        OpenCV_Erode2D_Constant_Padding/10.10.15026
        OpenCV_Opening2D_Constant_Padding/10.20.23102
        OpenCV_Closing2D_Constant_Padding/10.20.23074
        OpenCV_TopHat2D_Constant_Padding/10.30.32684
        OpenCV_BottomHat2D_Constant_Padding/10.30.32660
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32685
        OpenCV_Dilate2D_Constant_Padding/10.10.14970
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..730137c2 --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.222.231
        MLIR_Conv2D/168.168.110
        Buddy_Conv2D/12.32.3300
        Buddy_Corr2D_Constant_Padding/14.74.7148
        OpenCV_Filter2D_Constant_Padding/18.88.879
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14765
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32606
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103232
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048866
        Buddy_Erosion2D_Constant_Padding/10.20.23108
        Buddy_Dilation2D_Constant_Padding/10.20.23124
        Buddy_Opening2D_Constant_Padding/10.30.31961
        Buddy_Closing2D_Constant_Padding/10.30.32094
        Buddy_TopHat2D_Constant_Padding/10.90.9764
        Buddy_BottomHat2D_Constant_Padding/10.90.9784
        OpenCV_Erode2D_Constant_Padding/10.10.14984
        OpenCV_Opening2D_Constant_Padding/10.20.23103
        OpenCV_Closing2D_Constant_Padding/10.20.23089
        OpenCV_TopHat2D_Constant_Padding/10.30.32707
        OpenCV_BottomHat2D_Constant_Padding/10.30.32743
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32695
        OpenCV_Dilate2D_Constant_Padding/10.10.14997
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..5ff09924 --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.022.032
        MLIR_Conv2D/168.168.110
        Buddy_Conv2D/12.22.2304
        Buddy_Corr2D_Constant_Padding/14.74.7149
        OpenCV_Filter2D_Constant_Padding/18.88.879
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14719
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32629
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102880
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048902
        Buddy_Erosion2D_Constant_Padding/10.20.23148
        Buddy_Dilation2D_Constant_Padding/10.20.23112
        Buddy_Opening2D_Constant_Padding/10.40.41703
        Buddy_Closing2D_Constant_Padding/10.40.41933
        Buddy_TopHat2D_Constant_Padding/10.90.9724
        Buddy_BottomHat2D_Constant_Padding/10.90.9704
        OpenCV_Erode2D_Constant_Padding/10.10.15045
        OpenCV_Opening2D_Constant_Padding/10.20.23039
        OpenCV_Closing2D_Constant_Padding/10.20.23127
        OpenCV_TopHat2D_Constant_Padding/10.30.32665
        OpenCV_BottomHat2D_Constant_Padding/10.30.32664
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32666
        OpenCV_Dilate2D_Constant_Padding/10.10.14964
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..4d6ec707 --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.235.220
        MLIR_Conv2D/1122.1122.16
        Buddy_Conv2D/14.24.2167
        Buddy_Corr2D_Constant_Padding/17.97.989
        OpenCV_Filter2D_Constant_Padding/15.95.9118
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14760
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32636
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103065
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.047744
        Buddy_Erosion2D_Constant_Padding/10.20.23166
        Buddy_Dilation2D_Constant_Padding/10.20.23164
        Buddy_Opening2D_Constant_Padding/10.30.32083
        Buddy_Closing2D_Constant_Padding/10.30.32139
        Buddy_TopHat2D_Constant_Padding/10.90.9739
        Buddy_BottomHat2D_Constant_Padding/10.90.9755
        OpenCV_Erode2D_Constant_Padding/10.10.15029
        OpenCV_Opening2D_Constant_Padding/10.20.23140
        OpenCV_Closing2D_Constant_Padding/10.20.23204
        OpenCV_TopHat2D_Constant_Padding/10.30.32744
        OpenCV_BottomHat2D_Constant_Padding/10.30.32737
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32177
        OpenCV_Dilate2D_Constant_Padding/10.10.15097
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..fb5643ed --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/134.834.820
        MLIR_Conv2D/1122.0122.06
        Buddy_Conv2D/13.93.9179
        Buddy_Corr2D_Constant_Padding/17.97.989
        OpenCV_Filter2D_Constant_Padding/15.95.9118
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14765
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32629
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102844
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048492
        Buddy_Erosion2D_Constant_Padding/10.20.23127
        Buddy_Dilation2D_Constant_Padding/10.20.23155
        Buddy_Opening2D_Constant_Padding/10.30.32000
        Buddy_Closing2D_Constant_Padding/10.40.42036
        Buddy_TopHat2D_Constant_Padding/10.90.9751
        Buddy_BottomHat2D_Constant_Padding/10.90.9755
        OpenCV_Erode2D_Constant_Padding/10.10.15001
        OpenCV_Opening2D_Constant_Padding/10.20.23193
        OpenCV_Closing2D_Constant_Padding/10.20.23193
        OpenCV_TopHat2D_Constant_Padding/10.30.32734
        OpenCV_BottomHat2D_Constant_Padding/10.30.32725
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32723
        OpenCV_Dilate2D_Constant_Padding/10.10.15067
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..524fe758 --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0139
        MLIR_Conv2D/17.47.494
        Buddy_Conv2D/10.70.71001
        Buddy_Corr2D_Constant_Padding/11.11.1645
        OpenCV_Filter2D_Constant_Padding/11.91.9367
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14717
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32633
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103033
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048825
        Buddy_Erosion2D_Constant_Padding/10.20.23139
        Buddy_Dilation2D_Constant_Padding/10.20.23065
        Buddy_Opening2D_Constant_Padding/10.40.41920
        Buddy_Closing2D_Constant_Padding/10.40.41938
        Buddy_TopHat2D_Constant_Padding/10.90.9725
        Buddy_BottomHat2D_Constant_Padding/10.90.9739
        OpenCV_Erode2D_Constant_Padding/10.20.24653
        OpenCV_Opening2D_Constant_Padding/10.20.23211
        OpenCV_Closing2D_Constant_Padding/10.20.23106
        OpenCV_TopHat2D_Constant_Padding/10.30.32694
        OpenCV_BottomHat2D_Constant_Padding/10.30.32735
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32736
        OpenCV_Dilate2D_Constant_Padding/10.10.14980
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..f7e1cb5e --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.70.7962
        Buddy_Corr2D_Constant_Padding/11.11.1652
        OpenCV_Filter2D_Constant_Padding/11.91.9366
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14698
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32627
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102351
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048814
        Buddy_Erosion2D_Constant_Padding/10.20.23146
        Buddy_Dilation2D_Constant_Padding/10.20.23150
        Buddy_Opening2D_Constant_Padding/10.30.32128
        Buddy_Closing2D_Constant_Padding/10.30.32167
        Buddy_TopHat2D_Constant_Padding/10.90.9747
        Buddy_BottomHat2D_Constant_Padding/10.90.9769
        OpenCV_Erode2D_Constant_Padding/10.10.15009
        OpenCV_Opening2D_Constant_Padding/10.20.23227
        OpenCV_Closing2D_Constant_Padding/10.20.23120
        OpenCV_TopHat2D_Constant_Padding/10.30.32745
        OpenCV_BottomHat2D_Constant_Padding/10.30.32765
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32733
        OpenCV_Dilate2D_Constant_Padding/10.10.14956
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..9dbe1bab --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.911.960
        MLIR_Conv2D/129.829.823
        Buddy_Conv2D/12.12.1332
        Buddy_Corr2D_Constant_Padding/11.81.8390
        OpenCV_Filter2D_Constant_Padding/12.72.7256
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14768
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32629
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103262
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048878
        Buddy_Erosion2D_Constant_Padding/10.20.23105
        Buddy_Dilation2D_Constant_Padding/10.20.23120
        Buddy_Opening2D_Constant_Padding/10.40.41986
        Buddy_Closing2D_Constant_Padding/10.30.31944
        Buddy_TopHat2D_Constant_Padding/10.90.9747
        Buddy_BottomHat2D_Constant_Padding/10.90.9739
        OpenCV_Erode2D_Constant_Padding/10.10.14963
        OpenCV_Opening2D_Constant_Padding/10.20.23142
        OpenCV_Closing2D_Constant_Padding/10.20.23089
        OpenCV_TopHat2D_Constant_Padding/10.30.32689
        OpenCV_BottomHat2D_Constant_Padding/10.30.32693
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32664
        OpenCV_Dilate2D_Constant_Padding/10.10.15045
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..be961174 --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.611.660
        MLIR_Conv2D/129.929.923
        Buddy_Conv2D/12.12.1327
        Buddy_Corr2D_Constant_Padding/11.81.8389
        OpenCV_Filter2D_Constant_Padding/12.72.7255
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14755
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32633
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102709
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048854
        Buddy_Erosion2D_Constant_Padding/10.20.22456
        Buddy_Dilation2D_Constant_Padding/10.20.23097
        Buddy_Opening2D_Constant_Padding/10.40.42015
        Buddy_Closing2D_Constant_Padding/10.30.32002
        Buddy_TopHat2D_Constant_Padding/10.90.9721
        Buddy_BottomHat2D_Constant_Padding/10.90.9727
        OpenCV_Erode2D_Constant_Padding/10.10.14988
        OpenCV_Opening2D_Constant_Padding/10.20.23142
        OpenCV_Closing2D_Constant_Padding/10.20.23180
        OpenCV_TopHat2D_Constant_Padding/10.30.32711
        OpenCV_BottomHat2D_Constant_Padding/10.30.32642
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32712
        OpenCV_Dilate2D_Constant_Padding/10.10.15064
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..4660feda --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.40.41585
        Buddy_Corr2D_Constant_Padding/10.80.8879
        OpenCV_Filter2D_Constant_Padding/11.31.3549
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14708
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32618
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102964
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048907
        Buddy_Erosion2D_Constant_Padding/10.20.23019
        Buddy_Dilation2D_Constant_Padding/10.20.23004
        Buddy_Opening2D_Constant_Padding/10.30.32064
        Buddy_Closing2D_Constant_Padding/10.30.32044
        Buddy_TopHat2D_Constant_Padding/10.90.9782
        Buddy_BottomHat2D_Constant_Padding/10.90.9791
        OpenCV_Erode2D_Constant_Padding/10.10.15012
        OpenCV_Opening2D_Constant_Padding/10.20.23259
        OpenCV_Closing2D_Constant_Padding/10.20.23254
        OpenCV_TopHat2D_Constant_Padding/10.20.22816
        OpenCV_BottomHat2D_Constant_Padding/10.20.22807
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32604
        OpenCV_Dilate2D_Constant_Padding/10.10.14985
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..e151144d --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.40.41587
        Buddy_Corr2D_Constant_Padding/10.80.8850
        OpenCV_Filter2D_Constant_Padding/11.31.3549
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14759
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32628
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102952
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048945
        Buddy_Erosion2D_Constant_Padding/10.20.23245
        Buddy_Dilation2D_Constant_Padding/10.20.22941
        Buddy_Opening2D_Constant_Padding/10.40.41920
        Buddy_Closing2D_Constant_Padding/10.40.41942
        Buddy_TopHat2D_Constant_Padding/11.01.0686
        Buddy_BottomHat2D_Constant_Padding/11.01.0687
        OpenCV_Erode2D_Constant_Padding/10.10.14813
        OpenCV_Opening2D_Constant_Padding/10.20.23185
        OpenCV_Closing2D_Constant_Padding/10.20.23185
        OpenCV_TopHat2D_Constant_Padding/10.30.32747
        OpenCV_BottomHat2D_Constant_Padding/10.30.32760
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32735
        OpenCV_Dilate2D_Constant_Padding/10.10.14913
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..b288e88f --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9143
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.50.51300
        Buddy_Corr2D_Constant_Padding/10.80.8860
        OpenCV_Filter2D_Constant_Padding/11.31.3548
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14746
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32630
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102577
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048983
        Buddy_Erosion2D_Constant_Padding/10.20.23160
        Buddy_Dilation2D_Constant_Padding/10.20.23155
        Buddy_Opening2D_Constant_Padding/10.40.42114
        Buddy_Closing2D_Constant_Padding/10.40.41704
        Buddy_TopHat2D_Constant_Padding/11.01.0677
        Buddy_BottomHat2D_Constant_Padding/11.01.0684
        OpenCV_Erode2D_Constant_Padding/10.10.15055
        OpenCV_Opening2D_Constant_Padding/10.20.23286
        OpenCV_Closing2D_Constant_Padding/10.20.23307
        OpenCV_TopHat2D_Constant_Padding/10.20.22840
        OpenCV_BottomHat2D_Constant_Padding/10.20.22838
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32741
        OpenCV_Dilate2D_Constant_Padding/10.10.15019
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..90c4b26b --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9143
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.50.51375
        Buddy_Corr2D_Constant_Padding/10.80.8857
        OpenCV_Filter2D_Constant_Padding/11.31.3548
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14712
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32626
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103057
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048615
        Buddy_Erosion2D_Constant_Padding/10.20.23174
        Buddy_Dilation2D_Constant_Padding/10.20.23168
        Buddy_Opening2D_Constant_Padding/10.30.32081
        Buddy_Closing2D_Constant_Padding/10.30.32123
        Buddy_TopHat2D_Constant_Padding/10.90.9787
        Buddy_BottomHat2D_Constant_Padding/10.90.9762
        OpenCV_Erode2D_Constant_Padding/10.10.15037
        OpenCV_Opening2D_Constant_Padding/10.20.23118
        OpenCV_Closing2D_Constant_Padding/10.20.23165
        OpenCV_TopHat2D_Constant_Padding/10.30.32680
        OpenCV_BottomHat2D_Constant_Padding/10.30.32756
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32733
        OpenCV_Dilate2D_Constant_Padding/10.10.14846
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..f9868cf6 --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        MLIR_Conv2D/129.929.924
        Buddy_Conv2D/13.13.1225
        Buddy_Corr2D_Constant_Padding/12.42.4299
        OpenCV_Filter2D_Constant_Padding/14.24.2167
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14718
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32624
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102278
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.047123
        Buddy_Erosion2D_Constant_Padding/10.20.23147
        Buddy_Dilation2D_Constant_Padding/10.20.23121
        Buddy_Opening2D_Constant_Padding/10.30.32029
        Buddy_Closing2D_Constant_Padding/10.30.32088
        Buddy_TopHat2D_Constant_Padding/10.90.9755
        Buddy_BottomHat2D_Constant_Padding/10.90.9740
        OpenCV_Erode2D_Constant_Padding/10.10.15077
        OpenCV_Opening2D_Constant_Padding/10.20.23073
        OpenCV_Closing2D_Constant_Padding/10.20.23064
        OpenCV_TopHat2D_Constant_Padding/10.30.32696
        OpenCV_BottomHat2D_Constant_Padding/10.30.32699
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32689
        OpenCV_Dilate2D_Constant_Padding/10.10.15007
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..bddd4186 --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        MLIR_Conv2D/129.829.823
        Buddy_Conv2D/13.13.1225
        Buddy_Corr2D_Constant_Padding/12.42.4297
        OpenCV_Filter2D_Constant_Padding/14.24.2165
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14749
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32628
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102987
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048924
        Buddy_Erosion2D_Constant_Padding/10.20.23176
        Buddy_Dilation2D_Constant_Padding/10.20.23152
        Buddy_Opening2D_Constant_Padding/10.40.41998
        Buddy_Closing2D_Constant_Padding/10.30.31943
        Buddy_TopHat2D_Constant_Padding/10.90.9750
        Buddy_BottomHat2D_Constant_Padding/10.90.9737
        OpenCV_Erode2D_Constant_Padding/10.10.15030
        OpenCV_Opening2D_Constant_Padding/10.20.23017
        OpenCV_Closing2D_Constant_Padding/10.20.23170
        OpenCV_TopHat2D_Constant_Padding/10.30.32657
        OpenCV_BottomHat2D_Constant_Padding/10.30.32690
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32671
        OpenCV_Dilate2D_Constant_Padding/10.10.14878
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..6bd0958e --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.022.032
        MLIR_Conv2D/168.168.110
        Buddy_Conv2D/16.36.3109
        Buddy_Corr2D_Constant_Padding/14.74.7148
        OpenCV_Filter2D_Constant_Padding/18.88.879
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14771
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32626
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103087
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048675
        Buddy_Erosion2D_Constant_Padding/10.20.23188
        Buddy_Dilation2D_Constant_Padding/10.20.23182
        Buddy_Opening2D_Constant_Padding/10.30.32146
        Buddy_Closing2D_Constant_Padding/10.40.42142
        Buddy_TopHat2D_Constant_Padding/10.90.9745
        Buddy_BottomHat2D_Constant_Padding/10.90.9722
        OpenCV_Erode2D_Constant_Padding/10.10.15046
        OpenCV_Opening2D_Constant_Padding/10.20.23129
        OpenCV_Closing2D_Constant_Padding/10.20.23078
        OpenCV_TopHat2D_Constant_Padding/10.30.32660
        OpenCV_BottomHat2D_Constant_Padding/10.30.32723
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32685
        OpenCV_Dilate2D_Constant_Padding/10.10.14978
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..ffb15e51 --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.122.132
        MLIR_Conv2D/168.268.210
        Buddy_Conv2D/16.46.4110
        Buddy_Corr2D_Constant_Padding/14.74.7149
        OpenCV_Filter2D_Constant_Padding/18.88.880
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14781
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32627
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102862
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048877
        Buddy_Erosion2D_Constant_Padding/10.20.23093
        Buddy_Dilation2D_Constant_Padding/10.20.23107
        Buddy_Opening2D_Constant_Padding/10.30.32034
        Buddy_Closing2D_Constant_Padding/10.30.31989
        Buddy_TopHat2D_Constant_Padding/10.90.9754
        Buddy_BottomHat2D_Constant_Padding/10.90.9736
        OpenCV_Erode2D_Constant_Padding/10.10.15028
        OpenCV_Opening2D_Constant_Padding/10.20.23031
        OpenCV_Closing2D_Constant_Padding/10.20.23131
        OpenCV_TopHat2D_Constant_Padding/10.30.32697
        OpenCV_BottomHat2D_Constant_Padding/10.30.32706
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32674
        OpenCV_Dilate2D_Constant_Padding/10.20.24993
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..16d450da --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.235.220
        MLIR_Conv2D/1121.2121.26
        Buddy_Conv2D/110.810.864
        Buddy_Corr2D_Constant_Padding/17.97.989
        OpenCV_Filter2D_Constant_Padding/15.95.9118
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14772
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32626
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102812
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048301
        Buddy_Erosion2D_Constant_Padding/10.20.23183
        Buddy_Dilation2D_Constant_Padding/10.20.23163
        Buddy_Opening2D_Constant_Padding/10.30.32017
        Buddy_Closing2D_Constant_Padding/10.40.42024
        Buddy_TopHat2D_Constant_Padding/10.90.9749
        Buddy_BottomHat2D_Constant_Padding/10.90.9730
        OpenCV_Erode2D_Constant_Padding/10.10.15038
        OpenCV_Opening2D_Constant_Padding/10.20.23212
        OpenCV_Closing2D_Constant_Padding/10.20.23171
        OpenCV_TopHat2D_Constant_Padding/10.30.32701
        OpenCV_BottomHat2D_Constant_Padding/10.30.32755
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32727
        OpenCV_Dilate2D_Constant_Padding/10.10.15053
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..1839d422 --- /dev/null +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.335.320
        MLIR_Conv2D/1121.0121.06
        Buddy_Conv2D/110.910.964
        Buddy_Corr2D_Constant_Padding/17.97.988
        OpenCV_Filter2D_Constant_Padding/15.95.9118
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14703
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32264
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103210
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048545
        Buddy_Erosion2D_Constant_Padding/10.20.23132
        Buddy_Dilation2D_Constant_Padding/10.20.23160
        Buddy_Opening2D_Constant_Padding/10.30.32027
        Buddy_Closing2D_Constant_Padding/10.40.42017
        Buddy_TopHat2D_Constant_Padding/10.90.9750
        Buddy_BottomHat2D_Constant_Padding/10.90.9761
        OpenCV_Erode2D_Constant_Padding/10.10.15027
        OpenCV_Opening2D_Constant_Padding/10.20.23038
        OpenCV_Closing2D_Constant_Padding/10.20.23072
        OpenCV_TopHat2D_Constant_Padding/10.30.32693
        OpenCV_BottomHat2D_Constant_Padding/10.30.32663
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32707
        OpenCV_Dilate2D_Constant_Padding/10.10.15003
        \ No newline at end of file diff --git a/site/index.html b/site/index.html deleted file mode 100644 index 53741311..00000000 --- a/site/index.html +++ /dev/null @@ -1,39 +0,0 @@ - - -

        Buddy-Benchmark results

        \ No newline at end of file diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index 2e73bd2f..2d1fc70a 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        vectorization/vectorization_matrix.json

        2025-05-26 21:43:11 UTC

        +

        vectorization/vectorization_matrix.json

        2025-05-26 22:24:35 UTC

        vectorization_matrix.json

        diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..0daff6be --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:23:17+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.00293,4.94727,7.33203], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 144, + "real_time": 4.8616694079505072e+00, + "cpu_time": 4.8615443888888876e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 95, + "real_time": 7.3721156308525488e+00, + "cpu_time": 7.3717356526315809e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1663, + "real_time": 4.2029142590330371e-01, + "cpu_time": 4.2028052796151533e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 651, + "real_time": 1.0759140208889995e+00, + "cpu_time": 1.0758246789554526e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 367, + "real_time": 1.9042591896791223e+00, + "cpu_time": 1.9041629400544959e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4748, + "real_time": 1.4552543145347865e-01, + "cpu_time": 1.4551487215669751e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2632, + "real_time": 2.6613064492432725e-01, + "cpu_time": 2.6612582446808503e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102984, + "real_time": 6.7871305550391663e-03, + "cpu_time": 6.7868588518604838e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48927, + "real_time": 1.4295738095923072e-02, + "cpu_time": 1.4295068591984003e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3155, + "real_time": 2.2388931287071784e-01, + "cpu_time": 2.2387921521394577e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3149, + "real_time": 2.2184868068420990e-01, + "cpu_time": 2.2184349539536333e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2068, + "real_time": 3.5998106139713837e-01, + "cpu_time": 3.5995235735009629e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1957, + "real_time": 3.5581509042559806e-01, + "cpu_time": 3.5579569596320909e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 731, + "real_time": 9.3642933097975034e-01, + "cpu_time": 9.3637127633378980e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 729, + "real_time": 9.1944250121038151e-01, + "cpu_time": 9.1939183127572155e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5005, + "real_time": 1.3918872033203994e-01, + "cpu_time": 1.3918274625374638e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3076, + "real_time": 2.2821756435851281e-01, + "cpu_time": 2.2820870481144373e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3193, + "real_time": 2.2276521902770560e-01, + "cpu_time": 2.2275964672721563e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2760, + "real_time": 2.5519946441594243e-01, + "cpu_time": 2.5519398152173933e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2736, + "real_time": 2.5641642666111392e-01, + "cpu_time": 2.5640509210526247e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2716, + "real_time": 2.5645334311981793e-01, + "cpu_time": 2.5644544771723121e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4963, + "real_time": 1.4073776507927674e-01, + "cpu_time": 1.4073016078984532e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..99961cec --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,80 @@ +{ + "context": { + "date": "2025-05-26T22:23:41+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.00049,4.70947,7.18896], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 143, + "real_time": 4.8717384884407471e+00, + "cpu_time": 4.8714335384615381e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 95, + "real_time": 7.3674740956017848e+00, + "cpu_time": 7.3670748842105276e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1657, + "real_time": 4.2453381037884758e-01, + "cpu_time": 4.2450378636089325e-01, + "time_unit": "ms" + } \ No newline at end of file diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..a4651f0b --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:23:45+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2,4.66406,7.16064], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 59, + "real_time": 1.1827939521458189e+01, + "cpu_time": 1.1827101322033899e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 24, + "real_time": 2.9902152329062421e+01, + "cpu_time": 2.9900978541666660e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 751, + "real_time": 1.0176254473577009e+00, + "cpu_time": 1.0176059733688418e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 391, + "real_time": 1.7920395364160733e+00, + "cpu_time": 1.7919525166240420e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 255, + "real_time": 2.7389568660189125e+00, + "cpu_time": 2.7388458392156876e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4778, + "real_time": 1.4560087359802351e-01, + "cpu_time": 1.4559084554206772e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2630, + "real_time": 2.6603678361771227e-01, + "cpu_time": 2.6602515285171091e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103033, + "real_time": 6.7959410668000245e-03, + "cpu_time": 6.7955963623305181e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48860, + "real_time": 1.6051803029710720e-02, + "cpu_time": 1.6050967110110514e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3134, + "real_time": 2.2470747996133530e-01, + "cpu_time": 2.2469278525845587e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2971, + "real_time": 2.2581702669394693e-01, + "cpu_time": 2.2580936755301245e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1881, + "real_time": 3.4690140719302215e-01, + "cpu_time": 3.4687723391812830e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2009, + "real_time": 3.4828037635463577e-01, + "cpu_time": 3.4826756097560951e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 714, + "real_time": 9.2675730179087457e-01, + "cpu_time": 9.2670533893557483e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 723, + "real_time": 9.3064499171360560e-01, + "cpu_time": 9.3059483955739986e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4965, + "real_time": 1.4058820535649225e-01, + "cpu_time": 1.4057882396777469e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3088, + "real_time": 2.2168861282265556e-01, + "cpu_time": 2.2166768620466271e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3129, + "real_time": 2.2427948622152652e-01, + "cpu_time": 2.2426256631511651e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2625, + "real_time": 2.6387358208497363e-01, + "cpu_time": 2.6386711885714254e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2625, + "real_time": 2.6865486658754800e-01, + "cpu_time": 2.6863586247619137e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2692, + "real_time": 2.5986144485237161e-01, + "cpu_time": 2.5984780089153114e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5053, + "real_time": 1.3817052432281909e-01, + "cpu_time": 1.3816524757569701e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..3b942a13 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:24:09+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.06152,4.46436,7.02783], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 60, + "real_time": 1.1859718554963669e+01, + "cpu_time": 1.1859415466666666e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 24, + "real_time": 2.9875528533011675e+01, + "cpu_time": 2.9873963000000003e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 738, + "real_time": 1.0141539980523631e+00, + "cpu_time": 1.0141161287262870e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 393, + "real_time": 1.7817526648138620e+00, + "cpu_time": 1.7816172111959294e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 255, + "real_time": 2.7528921763102212e+00, + "cpu_time": 2.7528130431372522e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4756, + "real_time": 1.4574629629831337e-01, + "cpu_time": 1.4573879415475194e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2630, + "real_time": 2.6615853828622360e-01, + "cpu_time": 2.6614812585551312e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103027, + "real_time": 6.7919607560599695e-03, + "cpu_time": 6.7916588564162781e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48458, + "real_time": 1.4418768695910375e-02, + "cpu_time": 1.4418251145321751e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3152, + "real_time": 2.2244602997271998e-01, + "cpu_time": 2.2243656757614191e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3142, + "real_time": 2.2203278716565847e-01, + "cpu_time": 2.2202909579885419e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2069, + "real_time": 3.3419092044904070e-01, + "cpu_time": 3.3418129096181753e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2004, + "real_time": 3.3194217050146912e-01, + "cpu_time": 3.3190982884231574e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 753, + "real_time": 9.1564339421105734e-01, + "cpu_time": 9.1559011819389058e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 762, + "real_time": 8.7081289696177155e-01, + "cpu_time": 8.7080586220472411e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4961, + "real_time": 1.4088589137142596e-01, + "cpu_time": 1.4087717335214653e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3152, + "real_time": 2.1964701266042169e-01, + "cpu_time": 2.1964015609137086e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2470, + "real_time": 2.2777517133878794e-01, + "cpu_time": 2.2776716882591155e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2673, + "real_time": 2.5705931714488200e-01, + "cpu_time": 2.5705270594837276e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2749, + "real_time": 2.5811154434641043e-01, + "cpu_time": 2.5809966496907932e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2717, + "real_time": 2.5727819030255045e-01, + "cpu_time": 2.5726741663599595e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5082, + "real_time": 1.3795288426992885e-01, + "cpu_time": 1.3794680322707623e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..8caa890c --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:19:15+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.1123,8.58008,8.91211], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 144, + "real_time": 4.8711563124217925e+00, + "cpu_time": 4.8709695138888893e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 95, + "real_time": 7.3745613623606534e+00, + "cpu_time": 7.3740730210526300e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2234, + "real_time": 3.1500383327742615e-01, + "cpu_time": 3.1498164324082373e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 850, + "real_time": 8.2385407651171960e-01, + "cpu_time": 8.2383643647058880e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 546, + "real_time": 1.2790120646848784e+00, + "cpu_time": 1.2789613168498175e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4747, + "real_time": 1.4571417911770421e-01, + "cpu_time": 1.4570690183273646e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2629, + "real_time": 2.6612855584710304e-01, + "cpu_time": 2.6610386801065034e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102542, + "real_time": 6.8208885203973361e-03, + "cpu_time": 6.8207057205827868e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48907, + "real_time": 1.4288110696726128e-02, + "cpu_time": 1.4287668370580897e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3132, + "real_time": 2.2415018558388011e-01, + "cpu_time": 2.2413483556832717e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3121, + "real_time": 2.2181368127670550e-01, + "cpu_time": 2.2180239250240327e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2059, + "real_time": 3.3811343206891509e-01, + "cpu_time": 3.3810039873725029e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2107, + "real_time": 3.4437413974061759e-01, + "cpu_time": 3.4436074560987162e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 725, + "real_time": 9.0386319263228054e-01, + "cpu_time": 9.0380061931034261e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 744, + "real_time": 9.1072037485578372e-01, + "cpu_time": 9.1067572983870981e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5040, + "real_time": 1.3834959014304102e-01, + "cpu_time": 1.3834366051587288e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3222, + "real_time": 2.4203569839305569e-01, + "cpu_time": 2.4201785878336499e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3151, + "real_time": 2.2163398455094246e-01, + "cpu_time": 2.2162477150111126e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2758, + "real_time": 2.5459998263622902e-01, + "cpu_time": 2.5458755329949295e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2688, + "real_time": 2.5977311763978961e-01, + "cpu_time": 2.5976750632440504e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2740, + "real_time": 2.5554979084073193e-01, + "cpu_time": 2.5554263649635012e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4990, + "real_time": 1.4015850172432248e-01, + "cpu_time": 1.4015310941883707e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..9196ebc8 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:19:39+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.07275,8.05029,8.72705], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 140, + "real_time": 5.0083273622606486e+00, + "cpu_time": 5.0081867000000004e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 95, + "real_time": 7.3605304485873173e+00, + "cpu_time": 7.3600212842105277e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2233, + "real_time": 3.1330563174706644e-01, + "cpu_time": 3.1328374115539637e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 856, + "real_time": 8.1252799778480400e-01, + "cpu_time": 8.1250713200934543e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 547, + "real_time": 1.2758781748672290e+00, + "cpu_time": 1.2757829634369293e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4751, + "real_time": 1.4585499139904498e-01, + "cpu_time": 1.4584467838349827e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2631, + "real_time": 2.6639430358558497e-01, + "cpu_time": 2.6637908627898133e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103135, + "real_time": 6.7804008722305298e-03, + "cpu_time": 6.7801764095602830e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48881, + "real_time": 1.4308667188897338e-02, + "cpu_time": 1.4308491438391182e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3158, + "real_time": 2.2048866075473619e-01, + "cpu_time": 2.2047716117796104e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3109, + "real_time": 2.2516667965084228e-01, + "cpu_time": 2.2515504277902859e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2007, + "real_time": 3.4035718988202135e-01, + "cpu_time": 3.4034878375685157e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2062, + "real_time": 3.4142527042749898e-01, + "cpu_time": 3.4140258583899064e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 732, + "real_time": 9.0427116282243547e-01, + "cpu_time": 9.0420603961748647e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 731, + "real_time": 9.0717502631420310e-01, + "cpu_time": 9.0713226538987568e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5071, + "real_time": 1.3829381951705588e-01, + "cpu_time": 1.3829104515874571e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3192, + "real_time": 2.1901250266024194e-01, + "cpu_time": 2.1900474968671726e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3221, + "real_time": 2.1656987814476164e-01, + "cpu_time": 2.1656167028872991e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2767, + "real_time": 2.5171531484818088e-01, + "cpu_time": 2.5171004878930253e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2752, + "real_time": 2.5393080567334625e-01, + "cpu_time": 2.5391998219476775e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2708, + "real_time": 2.5875106429133671e-01, + "cpu_time": 2.5874283825701655e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4910, + "real_time": 1.4278720381485713e-01, + "cpu_time": 1.4278435274949061e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..67855d27 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:20:03+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.05127,7.65771,8.58301], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 139, + "real_time": 5.0038629271786847e+00, + "cpu_time": 5.0033628776978416e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 95, + "real_time": 7.3803350721534926e+00, + "cpu_time": 7.3799033789473709e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2239, + "real_time": 3.1316074467413446e-01, + "cpu_time": 3.1314651808843225e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 863, + "real_time": 8.1503567386198428e-01, + "cpu_time": 8.1501769061413665e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 548, + "real_time": 1.2776841457090238e+00, + "cpu_time": 1.2776387828467155e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4797, + "real_time": 1.4566776900085679e-01, + "cpu_time": 1.4565851615593084e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2637, + "real_time": 2.6631619319302774e-01, + "cpu_time": 2.6631011452408027e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103585, + "real_time": 6.7529736044738674e-03, + "cpu_time": 6.7528078100110968e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48856, + "real_time": 1.4314036464081647e-02, + "cpu_time": 1.4313678872605214e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3141, + "real_time": 2.2273098908196357e-01, + "cpu_time": 2.2271567589939528e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3162, + "real_time": 2.2138736713241733e-01, + "cpu_time": 2.2137922707147351e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2082, + "real_time": 3.3552730303905426e-01, + "cpu_time": 3.3551034341978869e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2093, + "real_time": 3.2841352896290538e-01, + "cpu_time": 3.2839717821309145e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 727, + "real_time": 9.1686937061267837e-01, + "cpu_time": 9.1678499724896845e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 732, + "real_time": 9.1540414962123651e-01, + "cpu_time": 9.1536734972677514e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5002, + "real_time": 1.3968796960440505e-01, + "cpu_time": 1.3968174870051955e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3104, + "real_time": 2.2470024378355785e-01, + "cpu_time": 2.2469779349226782e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3100, + "real_time": 2.2293068047973416e-01, + "cpu_time": 2.2292174161290265e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2764, + "real_time": 2.5439042700441117e-01, + "cpu_time": 2.5438529232995610e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2700, + "real_time": 2.6047515372435254e-01, + "cpu_time": 2.6046163111111165e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2725, + "real_time": 2.5558024166374033e-01, + "cpu_time": 2.5557356660550518e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4989, + "real_time": 1.3989395721268383e-01, + "cpu_time": 1.3988953577871338e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..6960ba2a --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:20:27+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.09033,7.21777,8.41211], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 140, + "real_time": 4.9988453675593645e+00, + "cpu_time": 4.9986953928571429e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 95, + "real_time": 7.3694647730965359e+00, + "cpu_time": 7.3690518526315794e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2227, + "real_time": 3.1326443532362186e-01, + "cpu_time": 3.1325685136955544e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 857, + "real_time": 8.1497179333429215e-01, + "cpu_time": 8.1495055892648771e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 548, + "real_time": 1.2764532125833696e+00, + "cpu_time": 1.2764195474452551e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4786, + "real_time": 1.4544369842011809e-01, + "cpu_time": 1.4543611554534056e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2621, + "real_time": 2.6697471229027631e-01, + "cpu_time": 2.6696165433040830e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103139, + "real_time": 6.7890966993477686e-03, + "cpu_time": 6.7888171981500727e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48916, + "real_time": 1.4305655840777915e-02, + "cpu_time": 1.4305205740453016e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3055, + "real_time": 2.2843102764478487e-01, + "cpu_time": 2.2842420523731577e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3073, + "real_time": 2.6378849096380458e-01, + "cpu_time": 2.6377914480963227e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1855, + "real_time": 3.8777429439790168e-01, + "cpu_time": 3.8775405013477082e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2033, + "real_time": 3.7358192460354012e-01, + "cpu_time": 3.7357057255287696e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 727, + "real_time": 9.1089915479065786e-01, + "cpu_time": 9.1081009628610654e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 732, + "real_time": 8.9685928809349658e-01, + "cpu_time": 8.9679970081967098e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5021, + "real_time": 1.3921439745610892e-01, + "cpu_time": 1.3920651204939244e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3154, + "real_time": 2.2428087977551930e-01, + "cpu_time": 2.2427543595434415e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3178, + "real_time": 2.1991694007300971e-01, + "cpu_time": 2.1990786815607272e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2759, + "real_time": 2.5378466030530800e-01, + "cpu_time": 2.5377213990576297e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2757, + "real_time": 2.5706717695150422e-01, + "cpu_time": 2.5705736452665900e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2734, + "real_time": 2.5537159994255476e-01, + "cpu_time": 2.5536476700804656e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5010, + "real_time": 1.3908601129899720e-01, + "cpu_time": 1.3908382435129729e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..72e90414 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:20:51+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.05859,6.79785,8.24023], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 60, + "real_time": 1.1761240319659313e+01, + "cpu_time": 1.1760665933333334e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 2.9861296891518261e+01, + "cpu_time": 2.9859531086956522e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 524, + "real_time": 1.2968991285915139e+00, + "cpu_time": 1.2968563473282440e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 294, + "real_time": 2.3691147109683679e+00, + "cpu_time": 2.3690150918367365e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 166, + "real_time": 4.1967903499502732e+00, + "cpu_time": 4.1966355481927673e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4766, + "real_time": 1.4561849387938666e-01, + "cpu_time": 1.4560848174569879e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2632, + "real_time": 2.6634444856285866e-01, + "cpu_time": 2.6633077697568358e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102992, + "real_time": 6.7888280357693833e-03, + "cpu_time": 6.7885894729687781e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48914, + "real_time": 1.4305232184815593e-02, + "cpu_time": 1.4304699186327010e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3110, + "real_time": 2.2340725712070894e-01, + "cpu_time": 2.2339657749196118e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3094, + "real_time": 2.2614223890984220e-01, + "cpu_time": 2.2612052133160956e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1992, + "real_time": 3.5395631080411044e-01, + "cpu_time": 3.5392976907630508e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1972, + "real_time": 3.4787828522086384e-01, + "cpu_time": 3.4785186156186626e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 731, + "real_time": 9.0749642305922085e-01, + "cpu_time": 9.0745977838577285e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 740, + "real_time": 9.0477319837019254e-01, + "cpu_time": 9.0473099999999862e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5054, + "real_time": 1.3808638364560150e-01, + "cpu_time": 1.3808249604273823e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3141, + "real_time": 2.2523653856871501e-01, + "cpu_time": 2.2523184909264546e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3119, + "real_time": 2.2439105939639611e-01, + "cpu_time": 2.2438301987816528e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2692, + "real_time": 2.8906816154452702e-01, + "cpu_time": 2.8906200445765301e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2695, + "real_time": 2.6125356983497106e-01, + "cpu_time": 2.6124337476808834e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2680, + "real_time": 2.6039684177445832e-01, + "cpu_time": 2.6038943208955156e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5013, + "real_time": 1.3913510936880311e-01, + "cpu_time": 1.3912800039896264e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..6ffdee9c --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:21:15+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.0376,6.41113,8.07324], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 59, + "real_time": 1.1810072113649319e+01, + "cpu_time": 1.1809691406779663e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 24, + "real_time": 2.9850440565496683e+01, + "cpu_time": 2.9848709291666665e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 542, + "real_time": 1.3058845338676248e+00, + "cpu_time": 1.3058292250922505e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 293, + "real_time": 2.3760891967666029e+00, + "cpu_time": 2.3760385187713329e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 166, + "real_time": 4.2043444307812727e+00, + "cpu_time": 4.2041359457831291e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4777, + "real_time": 1.4558693665838751e-01, + "cpu_time": 1.4558089763449858e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2635, + "real_time": 2.6631031602451427e-01, + "cpu_time": 2.6628476432637554e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102983, + "real_time": 6.7953448704826435e-03, + "cpu_time": 6.7950746045463790e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48881, + "real_time": 1.4309612515319918e-02, + "cpu_time": 1.4309047564493374e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3156, + "real_time": 2.2085438776378849e-01, + "cpu_time": 2.2084482160963267e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3159, + "real_time": 2.2232111032796306e-01, + "cpu_time": 2.2231153276353291e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2113, + "real_time": 3.3031660275356611e-01, + "cpu_time": 3.3030707903454742e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2087, + "real_time": 3.3365085249776527e-01, + "cpu_time": 3.3363314374700537e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 736, + "real_time": 8.7698101339375845e-01, + "cpu_time": 8.7694497826086870e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 740, + "real_time": 8.9143257638489881e-01, + "cpu_time": 8.9139059729729575e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5026, + "real_time": 1.3946750417345793e-01, + "cpu_time": 1.3946417011539988e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3102, + "real_time": 2.2649793296591225e-01, + "cpu_time": 2.2648724597034239e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3074, + "real_time": 2.2739966605379741e-01, + "cpu_time": 2.2739106668835357e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2684, + "real_time": 2.5760428355409920e-01, + "cpu_time": 2.5759685879284627e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2660, + "real_time": 2.6262844877695679e-01, + "cpu_time": 2.6261966090225614e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2685, + "real_time": 2.6083636883250827e-01, + "cpu_time": 2.6082749199255079e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4970, + "real_time": 1.4014078053070506e-01, + "cpu_time": 1.4013356338028188e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..635d3b03 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:21:40+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.02344,6.05566,7.91064], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 31, + "real_time": 2.2180669430282808e+01, + "cpu_time": 2.2179082290322576e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 6.8137688748538494e+01, + "cpu_time": 6.8135333700000004e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 300, + "real_time": 2.3162953307231269e+00, + "cpu_time": 2.3161617966666670e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 148, + "real_time": 4.7046484526347472e+00, + "cpu_time": 4.7044905743243222e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 79, + "real_time": 8.8026538915649244e+00, + "cpu_time": 8.8021716329113904e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4765, + "real_time": 1.4569725829556757e-01, + "cpu_time": 1.4568953284365163e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2606, + "real_time": 2.6624611809746812e-01, + "cpu_time": 2.6623398656945507e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103232, + "real_time": 6.7792688108119200e-03, + "cpu_time": 6.7791017320210810e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48866, + "real_time": 1.4305006073369965e-02, + "cpu_time": 1.4304623807964635e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3108, + "real_time": 2.2251177711011974e-01, + "cpu_time": 2.2250371010296024e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3124, + "real_time": 2.2213796699817903e-01, + "cpu_time": 2.2213028777208693e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1961, + "real_time": 3.3011947117825663e-01, + "cpu_time": 3.3010722947475762e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2094, + "real_time": 3.4953010870711282e-01, + "cpu_time": 3.4951667430754529e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 764, + "real_time": 8.9931203764302570e-01, + "cpu_time": 8.9927609816753939e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 784, + "real_time": 8.8094642186271288e-01, + "cpu_time": 8.8090844642857091e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4984, + "real_time": 1.4027801826656056e-01, + "cpu_time": 1.4027250120385235e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3103, + "real_time": 2.2453489277272926e-01, + "cpu_time": 2.2452527747341289e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3089, + "real_time": 2.2432715479765714e-01, + "cpu_time": 2.2432135351246352e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2707, + "real_time": 2.5748399006822781e-01, + "cpu_time": 2.5747665644625017e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2743, + "real_time": 2.5845320051250081e-01, + "cpu_time": 2.5844790521327066e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2695, + "real_time": 2.6067187923226154e-01, + "cpu_time": 2.6066210909090842e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4997, + "real_time": 1.3992553683061562e-01, + "cpu_time": 1.3991992815689469e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..8f78eddd --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:22:04+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.01465,5.729,7.75244], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 32, + "real_time": 2.1972846647258848e+01, + "cpu_time": 2.1971798968749997e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 6.8126448430120945e+01, + "cpu_time": 6.8123335000000012e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 304, + "real_time": 2.2191465893564257e+00, + "cpu_time": 2.2190450559210526e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 149, + "real_time": 4.6995133036535055e+00, + "cpu_time": 4.6994745369127537e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 79, + "real_time": 8.7998903156081330e+00, + "cpu_time": 8.7994201518987403e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4719, + "real_time": 1.4579868286336523e-01, + "cpu_time": 1.4579433396906127e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2629, + "real_time": 2.6617135984356205e-01, + "cpu_time": 2.6616194903004953e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102880, + "real_time": 6.7913589396544419e-03, + "cpu_time": 6.7911381706842900e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48902, + "real_time": 1.4305065534660009e-02, + "cpu_time": 1.4304335098768956e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3148, + "real_time": 2.2171691635190455e-01, + "cpu_time": 2.2170603208386286e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3112, + "real_time": 2.2349217224262827e-01, + "cpu_time": 2.2347660314910001e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1703, + "real_time": 4.0066563012980744e-01, + "cpu_time": 4.0064513623018178e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1933, + "real_time": 3.7044749353842232e-01, + "cpu_time": 3.7041857320227567e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 724, + "real_time": 9.4048063224438805e-01, + "cpu_time": 9.4042722513812205e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 704, + "real_time": 9.0413066209293902e-01, + "cpu_time": 9.0407184517045247e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5045, + "real_time": 1.3871030075285432e-01, + "cpu_time": 1.3870866759167522e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3039, + "real_time": 2.2786769644529187e-01, + "cpu_time": 2.2785653142481077e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3127, + "real_time": 2.2779202173893773e-01, + "cpu_time": 2.2778226670930599e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2665, + "real_time": 2.6074036131097494e-01, + "cpu_time": 2.6072306228893072e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2664, + "real_time": 2.6070837815244635e-01, + "cpu_time": 2.6070265427927913e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2666, + "real_time": 2.6246782640988719e-01, + "cpu_time": 2.6245429219804978e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4964, + "real_time": 1.4095451220521804e-01, + "cpu_time": 1.4094873912167652e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..3b314876 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:22:28+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.00977,5.48682,7.62891], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.5165282245725393e+01, + "cpu_time": 3.5164104549999998e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 6, + "real_time": 1.2213348659376304e+02, + "cpu_time": 1.2212578466666663e+02, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 167, + "real_time": 4.2077098376379753e+00, + "cpu_time": 4.2074904550898209e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 89, + "real_time": 7.8985005868284892e+00, + "cpu_time": 7.8981263483146078e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 118, + "real_time": 5.9179596729197748e+00, + "cpu_time": 5.9176466271186516e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4760, + "real_time": 1.4632203725769238e-01, + "cpu_time": 1.4631159096638646e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2636, + "real_time": 2.6651023998743484e-01, + "cpu_time": 2.6649754059180569e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103065, + "real_time": 6.8117591128138560e-03, + "cpu_time": 6.8116361713481842e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47744, + "real_time": 1.4486571433067561e-02, + "cpu_time": 1.4486011121816375e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3166, + "real_time": 2.2039369668809206e-01, + "cpu_time": 2.2038528837650059e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3164, + "real_time": 2.2002138955666956e-01, + "cpu_time": 2.2001238084702895e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2083, + "real_time": 3.2940170711127259e-01, + "cpu_time": 3.2938720835333585e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2139, + "real_time": 3.3093405893580163e-01, + "cpu_time": 3.3092209537166950e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 739, + "real_time": 8.7742192143919662e-01, + "cpu_time": 8.7732872395128714e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 755, + "real_time": 8.6296645566722419e-01, + "cpu_time": 8.6293647549668850e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5029, + "real_time": 1.3867238168688933e-01, + "cpu_time": 1.3867003002585018e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3140, + "real_time": 2.1959820489405066e-01, + "cpu_time": 2.1958653949044596e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3204, + "real_time": 2.2206443551458074e-01, + "cpu_time": 2.2205557740324600e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2744, + "real_time": 2.5502688172694199e-01, + "cpu_time": 2.5501861734693820e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2737, + "real_time": 2.5401829891954847e-01, + "cpu_time": 2.5401426963829005e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2177, + "real_time": 2.5462905149504739e-01, + "cpu_time": 2.5461684565916515e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5097, + "real_time": 1.3730056548799185e-01, + "cpu_time": 1.3729482813419697e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..4d296764 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:22:53+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.00537,5.20605,7.47852], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.4849253855645657e+01, + "cpu_time": 3.4847690799999995e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 6, + "real_time": 1.2197488794724147e+02, + "cpu_time": 1.2197084233333338e+02, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 179, + "real_time": 3.8715138948163506e+00, + "cpu_time": 3.8714169888268164e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 89, + "real_time": 7.8892975292179024e+00, + "cpu_time": 7.8889599438202236e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 118, + "real_time": 5.9100432889693870e+00, + "cpu_time": 5.9099251355932196e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4765, + "real_time": 1.4643552483441571e-01, + "cpu_time": 1.4643220839454354e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2629, + "real_time": 2.6632445666353799e-01, + "cpu_time": 2.6630646177253708e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102844, + "real_time": 6.7916700202742914e-03, + "cpu_time": 6.7913482167165796e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48492, + "real_time": 1.4438458006135467e-02, + "cpu_time": 1.4438022684154067e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3127, + "real_time": 2.2176959854766587e-01, + "cpu_time": 2.2174655100735502e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3155, + "real_time": 2.2122683313493682e-01, + "cpu_time": 2.2121264564183812e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2000, + "real_time": 3.4897359274327755e-01, + "cpu_time": 3.4894982949999953e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2036, + "real_time": 3.5520158397044788e-01, + "cpu_time": 3.5518303585461614e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 751, + "real_time": 9.1041019620416008e-01, + "cpu_time": 9.1037048335552573e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 755, + "real_time": 9.1234293106375941e-01, + "cpu_time": 9.1229672317880861e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5001, + "real_time": 1.3972985711104391e-01, + "cpu_time": 1.3972479084183179e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3193, + "real_time": 2.2016344559890813e-01, + "cpu_time": 2.2015363670529223e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3193, + "real_time": 2.1923410233998053e-01, + "cpu_time": 2.1922637206389020e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2734, + "real_time": 2.5621622435655922e-01, + "cpu_time": 2.5620427834674475e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2725, + "real_time": 2.5520740845881473e-01, + "cpu_time": 2.5519394238532134e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2723, + "real_time": 2.5575247593566069e-01, + "cpu_time": 2.5573402240176246e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5067, + "real_time": 1.3828342947730279e-01, + "cpu_time": 1.3827946674560934e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..bcbe8558 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:17:38+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.48291,11.0342,9.65625], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 139, + "real_time": 4.9939627270046758e+00, + "cpu_time": 4.9937224388489208e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 94, + "real_time": 7.3809593598893350e+00, + "cpu_time": 7.3805415851063847e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1001, + "real_time": 7.1126366076948166e-01, + "cpu_time": 7.1122309190809208e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 645, + "real_time": 1.0820790201194526e+00, + "cpu_time": 1.0819991410852714e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 367, + "real_time": 1.9048442226664573e+00, + "cpu_time": 1.9047783051771108e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4717, + "real_time": 1.4679838001146794e-01, + "cpu_time": 1.4678647275810888e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2633, + "real_time": 2.6625526897068763e-01, + "cpu_time": 2.6624255070262071e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103033, + "real_time": 6.7974476629531662e-03, + "cpu_time": 6.7971674414993247e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48825, + "real_time": 1.4324477137935753e-02, + "cpu_time": 1.4324028919610843e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3139, + "real_time": 2.2281337542475391e-01, + "cpu_time": 2.2280684963364145e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3065, + "real_time": 2.2091467243423274e-01, + "cpu_time": 2.2090241370309904e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1920, + "real_time": 3.6005476140417159e-01, + "cpu_time": 3.6003958437500022e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1938, + "real_time": 3.6397507227765025e-01, + "cpu_time": 3.6396496800825567e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 725, + "real_time": 8.9656516138849585e-01, + "cpu_time": 8.9648728137931033e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 739, + "real_time": 9.1901460866644191e-01, + "cpu_time": 9.1897194046008335e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4653, + "real_time": 1.5406708547102105e-01, + "cpu_time": 1.5406045841392613e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3211, + "real_time": 2.2334867362681299e-01, + "cpu_time": 2.2334181719090618e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3106, + "real_time": 2.2087438381052141e-01, + "cpu_time": 2.2086412846104286e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2694, + "real_time": 2.5705747035316123e-01, + "cpu_time": 2.5704664179658498e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2735, + "real_time": 2.5591517047755680e-01, + "cpu_time": 2.5590764716636138e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2736, + "real_time": 2.5600879739054982e-01, + "cpu_time": 2.5599841739765994e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4980, + "real_time": 1.4035605851665559e-01, + "cpu_time": 1.4035354417670684e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..af0b0c54 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:18:02+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.4873,10.48,9.50293], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 140, + "real_time": 5.0121187365480830e+00, + "cpu_time": 5.0116892428571438e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 95, + "real_time": 7.3891957535555486e+00, + "cpu_time": 7.3886794842105266e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 962, + "real_time": 7.0211241165032756e-01, + "cpu_time": 7.0208425571725575e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 652, + "real_time": 1.0686190941719556e+00, + "cpu_time": 1.0686101825153378e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 366, + "real_time": 1.9082594459336963e+00, + "cpu_time": 1.9081814426229509e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4698, + "real_time": 1.4621149407594242e-01, + "cpu_time": 1.4620496956151557e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2627, + "real_time": 2.6652376445064901e-01, + "cpu_time": 2.6651557099352852e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102351, + "real_time": 6.8222627678054546e-03, + "cpu_time": 6.8219908061474717e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48814, + "real_time": 1.4326468274316613e-02, + "cpu_time": 1.4325946429303082e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3146, + "real_time": 2.2161174837904832e-01, + "cpu_time": 2.2160283598219940e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3150, + "real_time": 2.2120316409402424e-01, + "cpu_time": 2.2118643746031705e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2128, + "real_time": 3.2950831661210922e-01, + "cpu_time": 3.2949963768797014e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2167, + "real_time": 3.3219173261796925e-01, + "cpu_time": 3.3217791832025839e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 747, + "real_time": 8.6567999977104793e-01, + "cpu_time": 8.6563935742971920e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 769, + "real_time": 8.7533015346031040e-01, + "cpu_time": 8.7531097139141834e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5009, + "real_time": 1.3942049210491378e-01, + "cpu_time": 1.3941283669395094e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3227, + "real_time": 2.1610022523952113e-01, + "cpu_time": 2.1609240285094464e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3120, + "real_time": 2.2463623064164168e-01, + "cpu_time": 2.2463104391025654e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2745, + "real_time": 2.5778625547234479e-01, + "cpu_time": 2.5777660327868818e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2765, + "real_time": 2.5453510783464522e-01, + "cpu_time": 2.5452241663652830e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2733, + "real_time": 2.5569065993646978e-01, + "cpu_time": 2.5567140065861649e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4956, + "real_time": 1.4090421929041450e-01, + "cpu_time": 1.4089905790960416e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..67bbc523 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:18:25+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.2627,9.78223,9.29688], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 60, + "real_time": 1.1918221662441889e+01, + "cpu_time": 1.1917878933333334e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 2.9796444284527198e+01, + "cpu_time": 2.9795302565217398e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 332, + "real_time": 2.0893121373850896e+00, + "cpu_time": 2.0892674909638549e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 390, + "real_time": 1.7985034925051224e+00, + "cpu_time": 1.7984618025641028e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 256, + "real_time": 2.7374664350645617e+00, + "cpu_time": 2.7373652890625015e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4768, + "real_time": 1.4551469881162549e-01, + "cpu_time": 1.4550845364932896e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2629, + "real_time": 2.6624438559410463e-01, + "cpu_time": 2.6623375123621162e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103262, + "real_time": 6.7758366610738774e-03, + "cpu_time": 6.7757091766574339e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48878, + "real_time": 1.4318795230307766e-02, + "cpu_time": 1.4318104157289568e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3105, + "real_time": 2.2355384296841091e-01, + "cpu_time": 2.2354234396135308e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3120, + "real_time": 2.2285880389599463e-01, + "cpu_time": 2.2284695673076915e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1986, + "real_time": 3.5211119298098192e-01, + "cpu_time": 3.5210273917421908e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1944, + "real_time": 3.4801221027234452e-01, + "cpu_time": 3.4799811882716053e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 747, + "real_time": 9.1425062392849521e-01, + "cpu_time": 9.1420195046853980e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 739, + "real_time": 8.9897179071248301e-01, + "cpu_time": 8.9893609201623792e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4963, + "real_time": 1.4065765219723869e-01, + "cpu_time": 1.4065358674189021e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3142, + "real_time": 2.2345523745352716e-01, + "cpu_time": 2.2343788860598351e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3089, + "real_time": 2.2617580448106950e-01, + "cpu_time": 2.2616760051796744e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2689, + "real_time": 2.6113948967985257e-01, + "cpu_time": 2.6113230011156607e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2693, + "real_time": 2.6151641644468282e-01, + "cpu_time": 2.6150498106201342e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2664, + "real_time": 2.6288690386681229e-01, + "cpu_time": 2.6287323873873880e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5045, + "real_time": 1.3856769339121486e-01, + "cpu_time": 1.3856448741328012e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..beb2ef45 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:18:50+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.17188,9.15625,9.10205], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 60, + "real_time": 1.1642454595615467e+01, + "cpu_time": 1.1642006716666668e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 2.9861606090613034e+01, + "cpu_time": 2.9859480782608685e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 327, + "real_time": 2.1290786120049452e+00, + "cpu_time": 2.1289165015290519e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 389, + "real_time": 1.8013706837797532e+00, + "cpu_time": 1.8012753650385598e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 255, + "real_time": 2.7427997367054808e+00, + "cpu_time": 2.7426879019607848e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4755, + "real_time": 1.4582137870049000e-01, + "cpu_time": 1.4581456950578325e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2633, + "real_time": 2.6608296191447867e-01, + "cpu_time": 2.6606331978731484e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102709, + "real_time": 6.8135985735357394e-03, + "cpu_time": 6.8133925848757161e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48854, + "real_time": 1.4308495751672484e-02, + "cpu_time": 1.4308075367421284e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2456, + "real_time": 2.3071197049649803e-01, + "cpu_time": 2.3070322231270354e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3097, + "real_time": 2.2570516530413531e-01, + "cpu_time": 2.2569759476913098e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2015, + "real_time": 3.5169201050295723e-01, + "cpu_time": 3.5164615136476429e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2002, + "real_time": 3.4624248832374899e-01, + "cpu_time": 3.4622674325674280e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 721, + "real_time": 9.2392744452546605e-01, + "cpu_time": 9.2383990707350849e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 727, + "real_time": 9.2287308074011276e-01, + "cpu_time": 9.2279450894085369e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4988, + "real_time": 1.4017799080505597e-01, + "cpu_time": 1.4017390336808308e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3142, + "real_time": 2.2201454960506939e-01, + "cpu_time": 2.2200861521323975e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3180, + "real_time": 2.2126464934656456e-01, + "cpu_time": 2.2125921069182428e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2711, + "real_time": 2.6487032235306335e-01, + "cpu_time": 2.6486294909627450e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2642, + "real_time": 2.6148257186293694e-01, + "cpu_time": 2.6147949810749349e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2712, + "real_time": 2.5861562781893044e-01, + "cpu_time": 2.5860544026548782e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5064, + "real_time": 1.3794987734648478e-01, + "cpu_time": 1.3794665541074216e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index e9cc3462..a47199eb 100644 --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:07:48+00:00", + "date": "2025-05-26T22:13:36+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.08887,1.48682,3.5083], + "load_avg": [30.7622,22.5166,11.9751], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 140, - "real_time": 4.9964816575603823e+00, - "cpu_time": 4.9958225071428561e+00, + "real_time": 4.9810536737952917e+00, + "cpu_time": 4.9810131785714287e+00, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 95, - "real_time": 7.3868563496752788e+00, - "cpu_time": 7.3858671157894733e+00, + "real_time": 7.3597828220379977e+00, + "cpu_time": 7.3595218842105261e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1443, - "real_time": 5.0951387766782230e-01, - "cpu_time": 5.0944817117117125e-01, + "iterations": 1585, + "real_time": 4.3800919777963437e-01, + "cpu_time": 4.3799472555205066e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 868, - "real_time": 8.0464044285397374e-01, - "cpu_time": 8.0453992165898691e-01, + "iterations": 879, + "real_time": 7.9641059736071729e-01, + "cpu_time": 7.9638082935153565e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 547, - "real_time": 1.2804496032228418e+00, - "cpu_time": 1.2802703528336392e+00, + "iterations": 549, + "real_time": 1.2760020127496217e+00, + "cpu_time": 1.2759357449908932e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4810, - "real_time": 1.4675747014566667e-01, - "cpu_time": 1.4673347920997926e-01, + "iterations": 4708, + "real_time": 1.4899489521258649e-01, + "cpu_time": 1.4898712000849618e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2572, - "real_time": 2.7049120086857914e-01, - "cpu_time": 2.7039851360808687e-01, + "iterations": 2618, + "real_time": 2.6733481813993409e-01, + "cpu_time": 2.6732274637127584e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 98744, - "real_time": 6.8054141457585182e-03, - "cpu_time": 6.8044771530422123e-03, + "iterations": 102964, + "real_time": 6.7923858877275613e-03, + "cpu_time": 6.7922180956450830e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 46694, - "real_time": 1.4306057277887686e-02, - "cpu_time": 1.4305728487600131e-02, + "iterations": 48907, + "real_time": 1.4310074157344543e-02, + "cpu_time": 1.4309516122436467e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3144, - "real_time": 2.2463264469882005e-01, - "cpu_time": 2.2462909382951662e-01, + "iterations": 3019, + "real_time": 2.3269172327215998e-01, + "cpu_time": 2.3268162702881737e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3072, - "real_time": 2.2357795508772446e-01, - "cpu_time": 2.2354433854166664e-01, + "iterations": 3004, + "real_time": 2.3115333293709711e-01, + "cpu_time": 2.3114786617842836e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1655, - "real_time": 4.0703193253620873e-01, - "cpu_time": 4.0702098006042287e-01, + "iterations": 2064, + "real_time": 3.3480858939244995e-01, + "cpu_time": 3.3479587936046529e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1766, - "real_time": 3.7941525744048549e-01, - "cpu_time": 3.7936569309173185e-01, + "iterations": 2044, + "real_time": 3.3931921504131735e-01, + "cpu_time": 3.3930748091976470e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 721, - "real_time": 8.9330576236221559e-01, - "cpu_time": 8.9318446185852929e-01, + "iterations": 782, + "real_time": 8.5598471171944346e-01, + "cpu_time": 8.5594456010230346e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 724, - "real_time": 9.3049459282864522e-01, - "cpu_time": 9.3035631767955873e-01, + "iterations": 791, + "real_time": 8.5073583035267109e-01, + "cpu_time": 8.5071446396965955e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5029, - "real_time": 1.3920862829917513e-01, - "cpu_time": 1.3919029707695360e-01, + "iterations": 5012, + "real_time": 1.3971026928861999e-01, + "cpu_time": 1.3970263727055060e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3135, - "real_time": 2.2619957939099278e-01, - "cpu_time": 2.2619461626794310e-01, + "iterations": 3259, + "real_time": 2.1476675100434811e-01, + "cpu_time": 2.1475941423749620e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3062, - "real_time": 2.2434361965184427e-01, - "cpu_time": 2.2434184650555231e-01, + "iterations": 3254, + "real_time": 2.1490678431600169e-01, + "cpu_time": 2.1489986662569091e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2642, - "real_time": 2.6697811249708425e-01, - "cpu_time": 2.6697067903103733e-01, + "iterations": 2816, + "real_time": 2.4862459527370942e-01, + "cpu_time": 2.4861500248579568e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2674, - "real_time": 2.6228858078582662e-01, - "cpu_time": 2.6228528160059844e-01, + "iterations": 2807, + "real_time": 2.4944172902042686e-01, + "cpu_time": 2.4942691806198783e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2688, - "real_time": 2.6034137824483750e-01, - "cpu_time": 2.6033733519345176e-01, + "iterations": 2604, + "real_time": 2.6835452481966965e-01, + "cpu_time": 2.6834275153609888e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4937, - "real_time": 1.4531036511784939e-01, - "cpu_time": 1.4529212335426331e-01, + "iterations": 4985, + "real_time": 1.4054916066714490e-01, + "cpu_time": 1.4054059458375109e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index a1c3c244..54028421 100644 --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:08:12+00:00", + "date": "2025-05-26T22:14:00+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.0625,1.521,3.4751], + "load_avg": [20.9478,20.8682,11.709], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 137, - "real_time": 5.1337036229398132e+00, - "cpu_time": 5.1329572627737230e+00, + "iterations": 144, + "real_time": 4.8620806774124503e+00, + "cpu_time": 4.8617971249999998e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 92, - "real_time": 7.5545981728836242e+00, - "cpu_time": 7.5545316195652177e+00, + "iterations": 95, + "real_time": 7.3647483399039819e+00, + "cpu_time": 7.3645103473684204e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1241, - "real_time": 5.9495814967117033e-01, - "cpu_time": 5.9494254391619683e-01, + "iterations": 1587, + "real_time": 4.4364659274120188e-01, + "cpu_time": 4.4360596030245769e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 864, - "real_time": 8.1512168549967035e-01, - "cpu_time": 8.1501009375000022e-01, + "iterations": 850, + "real_time": 7.9619941904264335e-01, + "cpu_time": 7.9616357647058844e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 548, - "real_time": 1.2782183135893659e+00, - "cpu_time": 1.2780402226277368e+00, + "iterations": 549, + "real_time": 1.2764639555322235e+00, + "cpu_time": 1.2764428561020034e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4723, - "real_time": 1.4875183663213251e-01, - "cpu_time": 1.4873103324158377e-01, + "iterations": 4759, + "real_time": 1.4590555271648464e-01, + "cpu_time": 1.4590127484765700e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2619, - "real_time": 2.6808699088703819e-01, - "cpu_time": 2.6804946315387551e-01, + "iterations": 2628, + "real_time": 2.6687351161758649e-01, + "cpu_time": 2.6686138660578385e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103122, - "real_time": 6.7839405446529655e-03, - "cpu_time": 6.7830443067434720e-03, + "iterations": 102952, + "real_time": 6.7868454739183929e-03, + "cpu_time": 6.7866766648535231e-03, "time_unit": "ms" }, { @@ -157,8 +157,192 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 46927, - "real_time": 1.4897604358085247e-02, - "cpu_time": 1.4895712084727326e-02, + "iterations": 48945, + "real_time": 1.4264975000005026e-02, + "cpu_time": 1.4264539462662167e-02, "time_unit": "ms" - } \ No newline at end of file + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3245, + "real_time": 2.2328324638915539e-01, + "cpu_time": 2.2327703112480715e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2941, + "real_time": 2.3091911119451655e-01, + "cpu_time": 2.3090739714382824e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1920, + "real_time": 3.6398599525758374e-01, + "cpu_time": 3.6397573020833335e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1942, + "real_time": 3.6292056717078303e-01, + "cpu_time": 3.6289218640576670e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 686, + "real_time": 9.5260923092983907e-01, + "cpu_time": 9.5257817346938856e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 687, + "real_time": 1.0497556183232317e+00, + "cpu_time": 1.0497079097525470e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4813, + "real_time": 1.4027526283645592e-01, + "cpu_time": 1.4027346166632040e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3185, + "real_time": 2.2067454829227232e-01, + "cpu_time": 2.2066997394034502e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3185, + "real_time": 2.2132610858328863e-01, + "cpu_time": 2.2132252841444267e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2747, + "real_time": 2.5434105026101389e-01, + "cpu_time": 2.5433080050964629e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2760, + "real_time": 2.5351542601550836e-01, + "cpu_time": 2.5350927717391269e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2735, + "real_time": 2.5544877979611563e-01, + "cpu_time": 2.5544034881170063e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4913, + "real_time": 1.4221790321921954e-01, + "cpu_time": 1.4221190637085293e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..7c0b9df1 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:14:25+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [14.4819,19.3521,11.4497], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 143, + "real_time": 4.8729841291279223e+00, + "cpu_time": 4.8728229930069924e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 95, + "real_time": 7.3783556489568012e+00, + "cpu_time": 7.3781441684210529e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1300, + "real_time": 5.2574137225747108e-01, + "cpu_time": 5.2571169923076932e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 860, + "real_time": 8.1764264465417968e-01, + "cpu_time": 8.1761070348837184e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 548, + "real_time": 1.2776352410768941e+00, + "cpu_time": 1.2775980602189785e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4746, + "real_time": 1.4604094214156566e-01, + "cpu_time": 1.4603584492203947e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2630, + "real_time": 2.6615352260069247e-01, + "cpu_time": 2.6613684258555137e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102577, + "real_time": 6.8162450673133279e-03, + "cpu_time": 6.8160773175273215e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48983, + "real_time": 1.4289135157354995e-02, + "cpu_time": 1.4288656635975739e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3160, + "real_time": 2.2105318258363235e-01, + "cpu_time": 2.2104365632911407e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3155, + "real_time": 2.2091350375000157e-01, + "cpu_time": 2.2090452329635513e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2114, + "real_time": 4.0858148446512765e-01, + "cpu_time": 4.0855447114474897e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1704, + "real_time": 4.0029905443495151e-01, + "cpu_time": 4.0026930692488327e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 677, + "real_time": 9.8041003429643159e-01, + "cpu_time": 9.8036545494830007e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 684, + "real_time": 9.7214487757076296e-01, + "cpu_time": 9.7207104093567231e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5055, + "real_time": 1.3827483485702474e-01, + "cpu_time": 1.3826953273986148e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3286, + "real_time": 2.1324022398151715e-01, + "cpu_time": 2.1323121302495490e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3307, + "real_time": 2.1567261159095585e-01, + "cpu_time": 2.1566540973692216e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2840, + "real_time": 2.4255320141223116e-01, + "cpu_time": 2.4253837957746441e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2838, + "real_time": 2.4312600713955676e-01, + "cpu_time": 2.4311954404510186e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2741, + "real_time": 2.5526782384278868e-01, + "cpu_time": 2.5525941590660284e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5019, + "real_time": 1.3948601532692595e-01, + "cpu_time": 1.3948021837019295e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..ca86c2dd --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:14:49+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [10.2954,17.9736,11.2031], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 143, + "real_time": 4.8654593683622931e+00, + "cpu_time": 4.8652642377622382e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 95, + "real_time": 7.3815446935201949e+00, + "cpu_time": 7.3812387052631561e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1375, + "real_time": 5.2700068327513605e-01, + "cpu_time": 5.2696759345454547e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 857, + "real_time": 8.1594105127464234e-01, + "cpu_time": 8.1588588098016346e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 548, + "real_time": 1.2793068257398414e+00, + "cpu_time": 1.2792433850364955e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4712, + "real_time": 1.4604800357048500e-01, + "cpu_time": 1.4604121286078095e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2626, + "real_time": 2.6662886477524511e-01, + "cpu_time": 2.6661615993907073e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103057, + "real_time": 6.7822204399716401e-03, + "cpu_time": 6.7820833325247156e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48615, + "real_time": 1.4394099605643772e-02, + "cpu_time": 1.4393704720765189e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3174, + "real_time": 2.2084131912007299e-01, + "cpu_time": 2.2083239823566458e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3168, + "real_time": 2.2007557951534787e-01, + "cpu_time": 2.2006410353535388e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2081, + "real_time": 3.2926392900376639e-01, + "cpu_time": 3.2925050552618995e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2123, + "real_time": 3.2805886033622378e-01, + "cpu_time": 3.2802363306641535e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 787, + "real_time": 8.8516553198821668e-01, + "cpu_time": 8.8512480304955576e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 762, + "real_time": 8.6372281886695879e-01, + "cpu_time": 8.6365337926509200e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5037, + "real_time": 1.3910725835547316e-01, + "cpu_time": 1.3910415386142530e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3118, + "real_time": 2.2313356545297972e-01, + "cpu_time": 2.2312385439384252e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3165, + "real_time": 2.2287553685347994e-01, + "cpu_time": 2.2286821358609735e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2680, + "real_time": 2.5539524547422110e-01, + "cpu_time": 2.5538522126865731e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2756, + "real_time": 2.5834521908632035e-01, + "cpu_time": 2.5833924056603741e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2733, + "real_time": 2.8245730964181365e-01, + "cpu_time": 2.8244755031101304e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4846, + "real_time": 1.4001663829600866e-01, + "cpu_time": 1.4001249938093291e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..a8cd749b --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:15:13+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [7.93994,16.9375,11.0059], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 59, + "real_time": 1.1848425839917134e+01, + "cpu_time": 1.1847907694915255e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 24, + "real_time": 2.9868476558476686e+01, + "cpu_time": 2.9867346333333341e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 225, + "real_time": 3.1161748617887497e+00, + "cpu_time": 3.1160428266666669e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 299, + "real_time": 2.3595800803955580e+00, + "cpu_time": 2.3594202575250853e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 167, + "real_time": 4.1960542631184987e+00, + "cpu_time": 4.1958419520958117e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4718, + "real_time": 1.4594548325409268e-01, + "cpu_time": 1.4594251165748195e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2624, + "real_time": 2.6661387244353024e-01, + "cpu_time": 2.6660572522865866e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102278, + "real_time": 6.8319474596379914e-03, + "cpu_time": 6.8317058311660401e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 47123, + "real_time": 1.4845184285984307e-02, + "cpu_time": 1.4844183286293328e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3147, + "real_time": 2.2381313694424959e-01, + "cpu_time": 2.2379139084842695e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3121, + "real_time": 2.2129215495836960e-01, + "cpu_time": 2.2128436654918285e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2029, + "real_time": 3.4534043006822945e-01, + "cpu_time": 3.4533251355347439e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2088, + "real_time": 3.3967797929899218e-01, + "cpu_time": 3.3966568295019134e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 755, + "real_time": 9.0232322498267847e-01, + "cpu_time": 9.0227498278145613e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 740, + "real_time": 9.0346561140708026e-01, + "cpu_time": 9.0344697432432508e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5077, + "real_time": 1.3789620528557836e-01, + "cpu_time": 1.3789097912152867e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3073, + "real_time": 2.2964896206897670e-01, + "cpu_time": 2.2963901692157565e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3064, + "real_time": 2.2711840156689481e-01, + "cpu_time": 2.2711246866840784e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2696, + "real_time": 2.6035577042210706e-01, + "cpu_time": 2.6034106676557928e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2699, + "real_time": 2.5962485761698106e-01, + "cpu_time": 2.5961401185624283e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2689, + "real_time": 2.6031346023969376e-01, + "cpu_time": 2.6029616586091403e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5007, + "real_time": 1.3964356222516028e-01, + "cpu_time": 1.3963851967245872e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..ccd6e407 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:15:37+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [5.91162,15.7373,10.7656], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 59, + "real_time": 1.1816273490756245e+01, + "cpu_time": 1.1815782491525423e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 2.9844296529241230e+01, + "cpu_time": 2.9842673869565211e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 225, + "real_time": 3.1147927790880203e+00, + "cpu_time": 3.1147311333333345e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 297, + "real_time": 2.3587474267089408e+00, + "cpu_time": 2.3585573636363653e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 165, + "real_time": 4.2047196716973279e+00, + "cpu_time": 4.2045767636363633e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4749, + "real_time": 1.4566486533892031e-01, + "cpu_time": 1.4565762518424935e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2628, + "real_time": 2.6631513128519241e-01, + "cpu_time": 2.6630184817351604e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102987, + "real_time": 6.7866966045152712e-03, + "cpu_time": 6.7864467457057695e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48924, + "real_time": 1.4303465876443516e-02, + "cpu_time": 1.4303076138500528e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3176, + "real_time": 2.2159935789234392e-01, + "cpu_time": 2.2158961429471016e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3152, + "real_time": 2.2156847473044838e-01, + "cpu_time": 2.2155957265228407e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1998, + "real_time": 3.5863460225236787e-01, + "cpu_time": 3.5861286786786817e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1943, + "real_time": 3.3647872074265117e-01, + "cpu_time": 3.3646851621204243e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 750, + "real_time": 9.0944948295752204e-01, + "cpu_time": 9.0935812666666738e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 737, + "real_time": 9.1223105323201448e-01, + "cpu_time": 9.1217854274084309e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5030, + "real_time": 1.3899165307491959e-01, + "cpu_time": 1.3898452544731602e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3017, + "real_time": 2.2983206278605298e-01, + "cpu_time": 2.2982459032151112e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3170, + "real_time": 2.2272756176322039e-01, + "cpu_time": 2.2271838801261890e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2657, + "real_time": 2.6656502677638771e-01, + "cpu_time": 2.6655630447873591e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2690, + "real_time": 2.6213287905578719e-01, + "cpu_time": 2.6212498327137529e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2671, + "real_time": 2.6169095993131253e-01, + "cpu_time": 2.6167889779108955e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4878, + "real_time": 1.4317088713468515e-01, + "cpu_time": 1.4316560619106178e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..d36004ad --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:16:02+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [4.57568,14.6328,10.5317], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 32, + "real_time": 2.1973324648570269e+01, + "cpu_time": 2.1972224593749999e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 6.8125827610492706e+01, + "cpu_time": 6.8118218799999994e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 109, + "real_time": 6.3353857931193955e+00, + "cpu_time": 6.3351724220183510e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 148, + "real_time": 4.7007313224713547e+00, + "cpu_time": 4.7003578243243220e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 79, + "real_time": 8.8025724538896650e+00, + "cpu_time": 8.8023394810126590e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4771, + "real_time": 1.4587134018218684e-01, + "cpu_time": 1.4586479689792500e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2626, + "real_time": 2.6619548271697435e-01, + "cpu_time": 2.6618333244478276e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103087, + "real_time": 6.7907857617304848e-03, + "cpu_time": 6.7906139959451753e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48675, + "real_time": 1.4325042925373248e-02, + "cpu_time": 1.4324671186440667e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3188, + "real_time": 2.1997994744456098e-01, + "cpu_time": 2.1996185319949818e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3182, + "real_time": 2.2068286009043886e-01, + "cpu_time": 2.2067045663104931e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2146, + "real_time": 3.3393757676450536e-01, + "cpu_time": 3.3392018080149133e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2142, + "real_time": 3.9424579399250914e-01, + "cpu_time": 3.9422578664799229e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 745, + "real_time": 9.0135914967364117e-01, + "cpu_time": 9.0132612617449748e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 722, + "real_time": 9.1764490556106015e-01, + "cpu_time": 9.1758207894736721e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5046, + "real_time": 1.3835566303664334e-01, + "cpu_time": 1.3834791062227497e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3129, + "real_time": 2.2442531306147767e-01, + "cpu_time": 2.2441539757110901e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3078, + "real_time": 2.2707124742364015e-01, + "cpu_time": 2.2706264035087806e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2660, + "real_time": 2.6315047818803250e-01, + "cpu_time": 2.6313447218045083e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2723, + "real_time": 2.5931872657301491e-01, + "cpu_time": 2.5930012449504203e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2685, + "real_time": 2.6018807744291683e-01, + "cpu_time": 2.6018054525139650e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4978, + "real_time": 1.4037757216926175e-01, + "cpu_time": 1.4037138931297666e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..34b97b05 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:16:26+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.6958,13.6172,10.3042], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 32, + "real_time": 2.2072315390687436e+01, + "cpu_time": 2.2071622781250003e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 6.8203160911798477e+01, + "cpu_time": 6.8199147800000006e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 110, + "real_time": 6.3550283455035901e+00, + "cpu_time": 6.3548004909090885e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 149, + "real_time": 4.7013974254763369e+00, + "cpu_time": 4.7011444362416119e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 80, + "real_time": 8.8078494882211089e+00, + "cpu_time": 8.8076434625000033e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4781, + "real_time": 1.4572581434082821e-01, + "cpu_time": 1.4571423677055004e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2627, + "real_time": 2.6675003648462248e-01, + "cpu_time": 2.6674071488389783e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102862, + "real_time": 6.8005258963692670e-03, + "cpu_time": 6.8001470805545260e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48877, + "real_time": 1.4313261689290913e-02, + "cpu_time": 1.4312918346052323e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3093, + "real_time": 2.2363355470597840e-01, + "cpu_time": 2.2361846039443906e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3107, + "real_time": 2.2298299821676992e-01, + "cpu_time": 2.2297160830383037e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2034, + "real_time": 3.4434181585863982e-01, + "cpu_time": 3.4430977974434573e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1989, + "real_time": 3.4222938963191002e-01, + "cpu_time": 3.4221931523378590e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 754, + "real_time": 8.9190859070902473e-01, + "cpu_time": 8.9187836339522575e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 736, + "real_time": 9.1232408000075293e-01, + "cpu_time": 9.1226822146739139e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5028, + "real_time": 1.3946793683009576e-01, + "cpu_time": 1.3945602704852819e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3031, + "real_time": 2.2504380306506150e-01, + "cpu_time": 2.2503673309138941e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3131, + "real_time": 2.2496341949870738e-01, + "cpu_time": 2.2495109709358108e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2697, + "real_time": 2.5882281868555918e-01, + "cpu_time": 2.5881240934371474e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2706, + "real_time": 2.5917346474314651e-01, + "cpu_time": 2.5916015003695453e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2674, + "real_time": 2.6137103022214808e-01, + "cpu_time": 2.6136243231114431e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4993, + "real_time": 1.5533545225912407e-01, + "cpu_time": 1.5532848247546552e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json new file mode 100644 index 00000000..dec55d36 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:16:50+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [3.11621,12.6836,10.0825], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.5189422406256199e+01, + "cpu_time": 3.5187518800000007e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 6, + "real_time": 1.2122860985497634e+02, + "cpu_time": 1.2122526516666669e+02, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 64, + "real_time": 1.0844544944120571e+01, + "cpu_time": 1.0844293578125004e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 89, + "real_time": 7.9097353298677486e+00, + "cpu_time": 7.9092355393258478e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 118, + "real_time": 5.9020572452474447e+00, + "cpu_time": 5.9018793644067769e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4772, + "real_time": 1.4571150201189348e-01, + "cpu_time": 1.4570317455993292e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2626, + "real_time": 2.6634765004417138e-01, + "cpu_time": 2.6633460662604719e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102812, + "real_time": 6.8068346751817806e-03, + "cpu_time": 6.8065919639730731e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48301, + "real_time": 1.4475951286809114e-02, + "cpu_time": 1.4475346970042036e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3183, + "real_time": 2.1950167024749800e-01, + "cpu_time": 2.1948676625824695e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3163, + "real_time": 2.1986513941245783e-01, + "cpu_time": 2.1985423174201710e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2017, + "real_time": 3.3586876661723120e-01, + "cpu_time": 3.3583024392662403e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2024, + "real_time": 3.5512142928810458e-01, + "cpu_time": 3.5510094416996085e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 749, + "real_time": 9.0315306835244591e-01, + "cpu_time": 9.0306743658211086e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 730, + "real_time": 9.0755133392059639e-01, + "cpu_time": 9.0750758493150741e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5038, + "real_time": 1.3894656190220539e-01, + "cpu_time": 1.3893922211194934e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3212, + "real_time": 2.2129651453638285e-01, + "cpu_time": 2.2129014912826861e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3171, + "real_time": 2.2047466810796210e-01, + "cpu_time": 2.2046259854935385e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2701, + "real_time": 2.5345145365080540e-01, + "cpu_time": 2.5344063087745250e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2755, + "real_time": 2.5492152249856348e-01, + "cpu_time": 2.5491474192377500e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2727, + "real_time": 2.5723294054779938e-01, + "cpu_time": 2.5722383608360888e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5053, + "real_time": 1.3811498115127827e-01, + "cpu_time": 1.3810704155946990e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..c85f8aec --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-05-26T22:17:14+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [2.73438,11.8242,9.8667], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.5257031954824924e+01, + "cpu_time": 3.5254986799999998e+01, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 6, + "real_time": 1.2104108215620120e+02, + "cpu_time": 1.2103646300000001e+02, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 64, + "real_time": 1.0882732516620308e+01, + "cpu_time": 1.0882239125000005e+01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 88, + "real_time": 7.9285275106402961e+00, + "cpu_time": 7.9281318181818197e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 118, + "real_time": 5.9106075757388341e+00, + "cpu_time": 5.9102044830508484e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4703, + "real_time": 1.4615413843127936e-01, + "cpu_time": 1.4614629045290239e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2264, + "real_time": 2.7192003906510320e-01, + "cpu_time": 2.7190400795052999e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 103210, + "real_time": 6.7838188378100085e-03, + "cpu_time": 6.7834774924910354e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 48545, + "real_time": 1.4473049482143629e-02, + "cpu_time": 1.4472298918529192e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3132, + "real_time": 2.2119966527092655e-01, + "cpu_time": 2.2118326660280993e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3160, + "real_time": 2.2111865279229381e-01, + "cpu_time": 2.2110744240506322e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2027, + "real_time": 3.3099344161030808e-01, + "cpu_time": 3.3097353527380324e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2017, + "real_time": 3.5027682275156247e-01, + "cpu_time": 3.5026231036192423e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 750, + "real_time": 9.0147645026445389e-01, + "cpu_time": 9.0144423999999890e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 761, + "real_time": 8.7758632188322672e-01, + "cpu_time": 8.7753931011826602e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5027, + "real_time": 1.3902871209037976e-01, + "cpu_time": 1.3902262721304959e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3038, + "real_time": 2.3011283285626460e-01, + "cpu_time": 2.3010441968400233e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3072, + "real_time": 2.2764660631461689e-01, + "cpu_time": 2.2763665071614628e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2693, + "real_time": 2.6117778368324673e-01, + "cpu_time": 2.6115971258819154e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2663, + "real_time": 2.6503028407541829e-01, + "cpu_time": 2.6501603755163383e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2707, + "real_time": 2.5905529948830119e-01, + "cpu_time": 2.5904600775766579e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5003, + "real_time": 1.3957380974457165e-01, + "cpu_time": 1.3957011373176126e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log index 7d6038f0..b7564e13 100644 --- a/test_result/imageprocessing/image-processing-result.log +++ b/test_result/imageprocessing/image-processing-result.log @@ -1,9 +1,9 @@ -Benchmark results - Mon May 26 22:07:47 UTC 2025 +Benchmark results - Mon May 26 22:13:36 UTC 2025 Testing SSE support SSE is supported. Running image-processing-benchmark for SSE Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:07:48+00:00 +2025-05-26T22:13:36+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -11,31 +11,991 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.09, 1.49, 3.51 +Load Average: 30.76, 22.52, 11.98 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.00 ms 5.00 ms 140 +Eigen_Convolve2D/1 4.98 ms 4.98 ms 140 +MLIR_Conv2D/1 7.36 ms 7.36 ms 95 +Buddy_Conv2D/1 0.438 ms 0.438 ms 1585 +Buddy_Corr2D_Constant_Padding/1 0.796 ms 0.796 ms 879 +OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 549 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4708 +Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2618 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102964 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48907 +Buddy_Erosion2D_Constant_Padding/1 0.233 ms 0.233 ms 3019 +Buddy_Dilation2D_Constant_Padding/1 0.231 ms 0.231 ms 3004 +Buddy_Opening2D_Constant_Padding/1 0.335 ms 0.335 ms 2064 +Buddy_Closing2D_Constant_Padding/1 0.339 ms 0.339 ms 2044 +Buddy_TopHat2D_Constant_Padding/1 0.856 ms 0.856 ms 782 +Buddy_BottomHat2D_Constant_Padding/1 0.851 ms 0.851 ms 791 +OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 5012 +OpenCV_Opening2D_Constant_Padding/1 0.215 ms 0.215 ms 3259 +OpenCV_Closing2D_Constant_Padding/1 0.215 ms 0.215 ms 3254 +OpenCV_TopHat2D_Constant_Padding/1 0.249 ms 0.249 ms 2816 +OpenCV_BottomHat2D_Constant_Padding/1 0.249 ms 0.249 ms 2807 +OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2604 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4985 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:14:00+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 20.95, 20.87, 11.71 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.86 ms 4.86 ms 144 +MLIR_Conv2D/1 7.36 ms 7.36 ms 95 +Buddy_Conv2D/1 0.444 ms 0.444 ms 1587 +Buddy_Corr2D_Constant_Padding/1 0.796 ms 0.796 ms 850 +OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 549 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4759 +Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2628 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102952 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48945 +Buddy_Erosion2D_Constant_Padding/1 0.223 ms 0.223 ms 3245 +Buddy_Dilation2D_Constant_Padding/1 0.231 ms 0.231 ms 2941 +Buddy_Opening2D_Constant_Padding/1 0.364 ms 0.364 ms 1920 +Buddy_Closing2D_Constant_Padding/1 0.363 ms 0.363 ms 1942 +Buddy_TopHat2D_Constant_Padding/1 0.953 ms 0.953 ms 686 +Buddy_BottomHat2D_Constant_Padding/1 1.05 ms 1.05 ms 687 +OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 4813 +OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3185 +OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3185 +OpenCV_TopHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2747 +OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2760 +OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2735 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4913 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:14:25+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 14.48, 19.35, 11.45 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.87 ms 4.87 ms 143 +MLIR_Conv2D/1 7.38 ms 7.38 ms 95 +Buddy_Conv2D/1 0.526 ms 0.526 ms 1300 +Buddy_Corr2D_Constant_Padding/1 0.818 ms 0.818 ms 860 +OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 548 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4746 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2630 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102577 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48983 +Buddy_Erosion2D_Constant_Padding/1 0.221 ms 0.221 ms 3160 +Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3155 +Buddy_Opening2D_Constant_Padding/1 0.409 ms 0.409 ms 2114 +Buddy_Closing2D_Constant_Padding/1 0.400 ms 0.400 ms 1704 +Buddy_TopHat2D_Constant_Padding/1 0.980 ms 0.980 ms 677 +Buddy_BottomHat2D_Constant_Padding/1 0.972 ms 0.972 ms 684 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5055 +OpenCV_Opening2D_Constant_Padding/1 0.213 ms 0.213 ms 3286 +OpenCV_Closing2D_Constant_Padding/1 0.216 ms 0.216 ms 3307 +OpenCV_TopHat2D_Constant_Padding/1 0.243 ms 0.243 ms 2840 +OpenCV_BottomHat2D_Constant_Padding/1 0.243 ms 0.243 ms 2838 +OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2741 +OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5019 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:14:49+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 10.30, 17.97, 11.20 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.87 ms 4.87 ms 143 +MLIR_Conv2D/1 7.38 ms 7.38 ms 95 +Buddy_Conv2D/1 0.527 ms 0.527 ms 1375 +Buddy_Corr2D_Constant_Padding/1 0.816 ms 0.816 ms 857 +OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 548 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4712 +Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2626 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103057 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48615 +Buddy_Erosion2D_Constant_Padding/1 0.221 ms 0.221 ms 3174 +Buddy_Dilation2D_Constant_Padding/1 0.220 ms 0.220 ms 3168 +Buddy_Opening2D_Constant_Padding/1 0.329 ms 0.329 ms 2081 +Buddy_Closing2D_Constant_Padding/1 0.328 ms 0.328 ms 2123 +Buddy_TopHat2D_Constant_Padding/1 0.885 ms 0.885 ms 787 +Buddy_BottomHat2D_Constant_Padding/1 0.864 ms 0.864 ms 762 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5037 +OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3118 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3165 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2680 +OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2756 +OpenCV_MorphGrad2D_Constant_Padding/1 0.282 ms 0.282 ms 2733 +OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4846 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:15:13+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 7.94, 16.94, 11.01 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.8 ms 11.8 ms 59 +MLIR_Conv2D/1 29.9 ms 29.9 ms 24 +Buddy_Conv2D/1 3.12 ms 3.12 ms 225 +Buddy_Corr2D_Constant_Padding/1 2.36 ms 2.36 ms 299 +OpenCV_Filter2D_Constant_Padding/1 4.20 ms 4.20 ms 167 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4718 +Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2624 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102278 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47123 +Buddy_Erosion2D_Constant_Padding/1 0.224 ms 0.224 ms 3147 +Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3121 +Buddy_Opening2D_Constant_Padding/1 0.345 ms 0.345 ms 2029 +Buddy_Closing2D_Constant_Padding/1 0.340 ms 0.340 ms 2088 +Buddy_TopHat2D_Constant_Padding/1 0.902 ms 0.902 ms 755 +Buddy_BottomHat2D_Constant_Padding/1 0.903 ms 0.903 ms 740 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5077 +OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3073 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3064 +OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2696 +OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2699 +OpenCV_MorphGrad2D_Constant_Padding/1 0.260 ms 0.260 ms 2689 +OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 5007 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:15:37+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 5.91, 15.74, 10.77 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.8 ms 11.8 ms 59 +MLIR_Conv2D/1 29.8 ms 29.8 ms 23 +Buddy_Conv2D/1 3.11 ms 3.11 ms 225 +Buddy_Corr2D_Constant_Padding/1 2.36 ms 2.36 ms 297 +OpenCV_Filter2D_Constant_Padding/1 4.20 ms 4.20 ms 165 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4749 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2628 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102987 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48924 +Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3176 +Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3152 +Buddy_Opening2D_Constant_Padding/1 0.359 ms 0.359 ms 1998 +Buddy_Closing2D_Constant_Padding/1 0.336 ms 0.336 ms 1943 +Buddy_TopHat2D_Constant_Padding/1 0.909 ms 0.909 ms 750 +Buddy_BottomHat2D_Constant_Padding/1 0.912 ms 0.912 ms 737 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5030 +OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3017 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3170 +OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2657 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2690 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2671 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4878 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:16:02+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 4.58, 14.63, 10.53 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 22.0 ms 22.0 ms 32 +MLIR_Conv2D/1 68.1 ms 68.1 ms 10 +Buddy_Conv2D/1 6.34 ms 6.34 ms 109 +Buddy_Corr2D_Constant_Padding/1 4.70 ms 4.70 ms 148 +OpenCV_Filter2D_Constant_Padding/1 8.80 ms 8.80 ms 79 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4771 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2626 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103087 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48675 +Buddy_Erosion2D_Constant_Padding/1 0.220 ms 0.220 ms 3188 +Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3182 +Buddy_Opening2D_Constant_Padding/1 0.334 ms 0.334 ms 2146 +Buddy_Closing2D_Constant_Padding/1 0.394 ms 0.394 ms 2142 +Buddy_TopHat2D_Constant_Padding/1 0.901 ms 0.901 ms 745 +Buddy_BottomHat2D_Constant_Padding/1 0.918 ms 0.918 ms 722 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5046 +OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3129 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3078 +OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2660 +OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2723 +OpenCV_MorphGrad2D_Constant_Padding/1 0.260 ms 0.260 ms 2685 +OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4978 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:16:26+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.70, 13.62, 10.30 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 22.1 ms 22.1 ms 32 +MLIR_Conv2D/1 68.2 ms 68.2 ms 10 +Buddy_Conv2D/1 6.36 ms 6.35 ms 110 +Buddy_Corr2D_Constant_Padding/1 4.70 ms 4.70 ms 149 +OpenCV_Filter2D_Constant_Padding/1 8.81 ms 8.81 ms 80 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4781 +Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2627 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102862 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48877 +Buddy_Erosion2D_Constant_Padding/1 0.224 ms 0.224 ms 3093 +Buddy_Dilation2D_Constant_Padding/1 0.223 ms 0.223 ms 3107 +Buddy_Opening2D_Constant_Padding/1 0.344 ms 0.344 ms 2034 +Buddy_Closing2D_Constant_Padding/1 0.342 ms 0.342 ms 1989 +Buddy_TopHat2D_Constant_Padding/1 0.892 ms 0.892 ms 754 +Buddy_BottomHat2D_Constant_Padding/1 0.912 ms 0.912 ms 736 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5028 +OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3031 +OpenCV_Closing2D_Constant_Padding/1 0.225 ms 0.225 ms 3131 +OpenCV_TopHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2697 +OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2706 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2674 +OpenCV_Dilate2D_Constant_Padding/1 0.155 ms 0.155 ms 4993 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:16:50+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 3.12, 12.68, 10.08 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 35.2 ms 35.2 ms 20 +MLIR_Conv2D/1 121 ms 121 ms 6 +Buddy_Conv2D/1 10.8 ms 10.8 ms 64 +Buddy_Corr2D_Constant_Padding/1 7.91 ms 7.91 ms 89 +OpenCV_Filter2D_Constant_Padding/1 5.90 ms 5.90 ms 118 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4772 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2626 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102812 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48301 +Buddy_Erosion2D_Constant_Padding/1 0.220 ms 0.219 ms 3183 +Buddy_Dilation2D_Constant_Padding/1 0.220 ms 0.220 ms 3163 +Buddy_Opening2D_Constant_Padding/1 0.336 ms 0.336 ms 2017 +Buddy_Closing2D_Constant_Padding/1 0.355 ms 0.355 ms 2024 +Buddy_TopHat2D_Constant_Padding/1 0.903 ms 0.903 ms 749 +Buddy_BottomHat2D_Constant_Padding/1 0.908 ms 0.908 ms 730 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5038 +OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3212 +OpenCV_Closing2D_Constant_Padding/1 0.220 ms 0.220 ms 3171 +OpenCV_TopHat2D_Constant_Padding/1 0.253 ms 0.253 ms 2701 +OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2755 +OpenCV_MorphGrad2D_Constant_Padding/1 0.257 ms 0.257 ms 2727 +OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5053 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:17:14+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.73, 11.82, 9.87 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 35.3 ms 35.3 ms 20 +MLIR_Conv2D/1 121 ms 121 ms 6 +Buddy_Conv2D/1 10.9 ms 10.9 ms 64 +Buddy_Corr2D_Constant_Padding/1 7.93 ms 7.93 ms 88 +OpenCV_Filter2D_Constant_Padding/1 5.91 ms 5.91 ms 118 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4703 +Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2264 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103210 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48545 +Buddy_Erosion2D_Constant_Padding/1 0.221 ms 0.221 ms 3132 +Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3160 +Buddy_Opening2D_Constant_Padding/1 0.331 ms 0.331 ms 2027 +Buddy_Closing2D_Constant_Padding/1 0.350 ms 0.350 ms 2017 +Buddy_TopHat2D_Constant_Padding/1 0.901 ms 0.901 ms 750 +Buddy_BottomHat2D_Constant_Padding/1 0.878 ms 0.878 ms 761 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5027 +OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3038 +OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 3072 +OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2693 +OpenCV_BottomHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2663 +OpenCV_MorphGrad2D_Constant_Padding/1 0.259 ms 0.259 ms 2707 +OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 5003 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:17:38+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.48, 11.03, 9.66 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.99 ms 4.99 ms 139 +MLIR_Conv2D/1 7.38 ms 7.38 ms 94 +Buddy_Conv2D/1 0.711 ms 0.711 ms 1001 +Buddy_Corr2D_Constant_Padding/1 1.08 ms 1.08 ms 645 +OpenCV_Filter2D_Constant_Padding/1 1.90 ms 1.90 ms 367 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.147 ms 0.147 ms 4717 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2633 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103033 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48825 +Buddy_Erosion2D_Constant_Padding/1 0.223 ms 0.223 ms 3139 +Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3065 +Buddy_Opening2D_Constant_Padding/1 0.360 ms 0.360 ms 1920 +Buddy_Closing2D_Constant_Padding/1 0.364 ms 0.364 ms 1938 +Buddy_TopHat2D_Constant_Padding/1 0.897 ms 0.896 ms 725 +Buddy_BottomHat2D_Constant_Padding/1 0.919 ms 0.919 ms 739 +OpenCV_Erode2D_Constant_Padding/1 0.154 ms 0.154 ms 4653 +OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3211 +OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3106 +OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2694 +OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2735 +OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2736 +OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4980 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:18:02+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.49, 10.48, 9.50 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.01 ms 5.01 ms 140 MLIR_Conv2D/1 7.39 ms 7.39 ms 95 -Buddy_Conv2D/1 0.510 ms 0.509 ms 1443 -Buddy_Corr2D_Constant_Padding/1 0.805 ms 0.805 ms 868 +Buddy_Conv2D/1 0.702 ms 0.702 ms 962 +Buddy_Corr2D_Constant_Padding/1 1.07 ms 1.07 ms 652 +OpenCV_Filter2D_Constant_Padding/1 1.91 ms 1.91 ms 366 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4698 +Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2627 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102351 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48814 +Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3146 +Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3150 +Buddy_Opening2D_Constant_Padding/1 0.330 ms 0.329 ms 2128 +Buddy_Closing2D_Constant_Padding/1 0.332 ms 0.332 ms 2167 +Buddy_TopHat2D_Constant_Padding/1 0.866 ms 0.866 ms 747 +Buddy_BottomHat2D_Constant_Padding/1 0.875 ms 0.875 ms 769 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5009 +OpenCV_Opening2D_Constant_Padding/1 0.216 ms 0.216 ms 3227 +OpenCV_Closing2D_Constant_Padding/1 0.225 ms 0.225 ms 3120 +OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2745 +OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2765 +OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2733 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4956 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:18:25+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.26, 9.78, 9.30 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.9 ms 11.9 ms 60 +MLIR_Conv2D/1 29.8 ms 29.8 ms 23 +Buddy_Conv2D/1 2.09 ms 2.09 ms 332 +Buddy_Corr2D_Constant_Padding/1 1.80 ms 1.80 ms 390 +OpenCV_Filter2D_Constant_Padding/1 2.74 ms 2.74 ms 256 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4768 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2629 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103262 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48878 +Buddy_Erosion2D_Constant_Padding/1 0.224 ms 0.224 ms 3105 +Buddy_Dilation2D_Constant_Padding/1 0.223 ms 0.223 ms 3120 +Buddy_Opening2D_Constant_Padding/1 0.352 ms 0.352 ms 1986 +Buddy_Closing2D_Constant_Padding/1 0.348 ms 0.348 ms 1944 +Buddy_TopHat2D_Constant_Padding/1 0.914 ms 0.914 ms 747 +Buddy_BottomHat2D_Constant_Padding/1 0.899 ms 0.899 ms 739 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4963 +OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3142 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3089 +OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2689 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2693 +OpenCV_MorphGrad2D_Constant_Padding/1 0.263 ms 0.263 ms 2664 +OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5045 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:18:50+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.17, 9.16, 9.10 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.6 ms 11.6 ms 60 +MLIR_Conv2D/1 29.9 ms 29.9 ms 23 +Buddy_Conv2D/1 2.13 ms 2.13 ms 327 +Buddy_Corr2D_Constant_Padding/1 1.80 ms 1.80 ms 389 +OpenCV_Filter2D_Constant_Padding/1 2.74 ms 2.74 ms 255 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4755 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2633 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102709 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48854 +Buddy_Erosion2D_Constant_Padding/1 0.231 ms 0.231 ms 2456 +Buddy_Dilation2D_Constant_Padding/1 0.226 ms 0.226 ms 3097 +Buddy_Opening2D_Constant_Padding/1 0.352 ms 0.352 ms 2015 +Buddy_Closing2D_Constant_Padding/1 0.346 ms 0.346 ms 2002 +Buddy_TopHat2D_Constant_Padding/1 0.924 ms 0.924 ms 721 +Buddy_BottomHat2D_Constant_Padding/1 0.923 ms 0.923 ms 727 +OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 4988 +OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3142 +OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3180 +OpenCV_TopHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2711 +OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2642 +OpenCV_MorphGrad2D_Constant_Padding/1 0.259 ms 0.259 ms 2712 +OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5064 +Testing AVX2 support +AVX2 is supported. +Running image-processing-benchmark for AVX2 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:19:15+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.11, 8.58, 8.91 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.87 ms 4.87 ms 144 +MLIR_Conv2D/1 7.37 ms 7.37 ms 95 +Buddy_Conv2D/1 0.315 ms 0.315 ms 2234 +Buddy_Corr2D_Constant_Padding/1 0.824 ms 0.824 ms 850 +OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 546 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4747 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2629 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102542 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48907 +Buddy_Erosion2D_Constant_Padding/1 0.224 ms 0.224 ms 3132 +Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3121 +Buddy_Opening2D_Constant_Padding/1 0.338 ms 0.338 ms 2059 +Buddy_Closing2D_Constant_Padding/1 0.344 ms 0.344 ms 2107 +Buddy_TopHat2D_Constant_Padding/1 0.904 ms 0.904 ms 725 +Buddy_BottomHat2D_Constant_Padding/1 0.911 ms 0.911 ms 744 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5040 +OpenCV_Opening2D_Constant_Padding/1 0.242 ms 0.242 ms 3222 +OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3151 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2758 +OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2688 +OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2740 +OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4990 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:19:39+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.07, 8.05, 8.73 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.01 ms 5.01 ms 140 +MLIR_Conv2D/1 7.36 ms 7.36 ms 95 +Buddy_Conv2D/1 0.313 ms 0.313 ms 2233 +Buddy_Corr2D_Constant_Padding/1 0.813 ms 0.813 ms 856 OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 547 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.147 ms 0.147 ms 4810 -Buddy_Resize2D_Bilinear_Interpolation/1 0.270 ms 0.270 ms 2572 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 98744 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 46694 -Buddy_Erosion2D_Constant_Padding/1 0.225 ms 0.225 ms 3144 -Buddy_Dilation2D_Constant_Padding/1 0.224 ms 0.224 ms 3072 -Buddy_Opening2D_Constant_Padding/1 0.407 ms 0.407 ms 1655 -Buddy_Closing2D_Constant_Padding/1 0.379 ms 0.379 ms 1766 -Buddy_TopHat2D_Constant_Padding/1 0.893 ms 0.893 ms 721 -Buddy_BottomHat2D_Constant_Padding/1 0.930 ms 0.930 ms 724 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4751 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2631 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103135 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48881 +Buddy_Erosion2D_Constant_Padding/1 0.220 ms 0.220 ms 3158 +Buddy_Dilation2D_Constant_Padding/1 0.225 ms 0.225 ms 3109 +Buddy_Opening2D_Constant_Padding/1 0.340 ms 0.340 ms 2007 +Buddy_Closing2D_Constant_Padding/1 0.341 ms 0.341 ms 2062 +Buddy_TopHat2D_Constant_Padding/1 0.904 ms 0.904 ms 732 +Buddy_BottomHat2D_Constant_Padding/1 0.907 ms 0.907 ms 731 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5071 +OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3192 +OpenCV_Closing2D_Constant_Padding/1 0.217 ms 0.217 ms 3221 +OpenCV_TopHat2D_Constant_Padding/1 0.252 ms 0.252 ms 2767 +OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2752 +OpenCV_MorphGrad2D_Constant_Padding/1 0.259 ms 0.259 ms 2708 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4910 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:20:03+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.05, 7.66, 8.58 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.00 ms 5.00 ms 139 +MLIR_Conv2D/1 7.38 ms 7.38 ms 95 +Buddy_Conv2D/1 0.313 ms 0.313 ms 2239 +Buddy_Corr2D_Constant_Padding/1 0.815 ms 0.815 ms 863 +OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 548 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4797 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2637 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103585 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48856 +Buddy_Erosion2D_Constant_Padding/1 0.223 ms 0.223 ms 3141 +Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3162 +Buddy_Opening2D_Constant_Padding/1 0.336 ms 0.336 ms 2082 +Buddy_Closing2D_Constant_Padding/1 0.328 ms 0.328 ms 2093 +Buddy_TopHat2D_Constant_Padding/1 0.917 ms 0.917 ms 727 +Buddy_BottomHat2D_Constant_Padding/1 0.915 ms 0.915 ms 732 +OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 5002 +OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3104 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3100 +OpenCV_TopHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2764 +OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2700 +OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2725 +OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4989 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:20:27+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.09, 7.22, 8.41 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.00 ms 5.00 ms 140 +MLIR_Conv2D/1 7.37 ms 7.37 ms 95 +Buddy_Conv2D/1 0.313 ms 0.313 ms 2227 +Buddy_Corr2D_Constant_Padding/1 0.815 ms 0.815 ms 857 +OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 548 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.145 ms 0.145 ms 4786 +Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2621 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103139 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48916 +Buddy_Erosion2D_Constant_Padding/1 0.228 ms 0.228 ms 3055 +Buddy_Dilation2D_Constant_Padding/1 0.264 ms 0.264 ms 3073 +Buddy_Opening2D_Constant_Padding/1 0.388 ms 0.388 ms 1855 +Buddy_Closing2D_Constant_Padding/1 0.374 ms 0.374 ms 2033 +Buddy_TopHat2D_Constant_Padding/1 0.911 ms 0.911 ms 727 +Buddy_BottomHat2D_Constant_Padding/1 0.897 ms 0.897 ms 732 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5021 +OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3154 +OpenCV_Closing2D_Constant_Padding/1 0.220 ms 0.220 ms 3178 +OpenCV_TopHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2759 +OpenCV_BottomHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2757 +OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2734 +OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5010 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:20:51+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.06, 6.80, 8.24 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.8 ms 11.8 ms 60 +MLIR_Conv2D/1 29.9 ms 29.9 ms 23 +Buddy_Conv2D/1 1.30 ms 1.30 ms 524 +Buddy_Corr2D_Constant_Padding/1 2.37 ms 2.37 ms 294 +OpenCV_Filter2D_Constant_Padding/1 4.20 ms 4.20 ms 166 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4766 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2632 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102992 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48914 +Buddy_Erosion2D_Constant_Padding/1 0.223 ms 0.223 ms 3110 +Buddy_Dilation2D_Constant_Padding/1 0.226 ms 0.226 ms 3094 +Buddy_Opening2D_Constant_Padding/1 0.354 ms 0.354 ms 1992 +Buddy_Closing2D_Constant_Padding/1 0.348 ms 0.348 ms 1972 +Buddy_TopHat2D_Constant_Padding/1 0.907 ms 0.907 ms 731 +Buddy_BottomHat2D_Constant_Padding/1 0.905 ms 0.905 ms 740 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5054 +OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3141 +OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3119 +OpenCV_TopHat2D_Constant_Padding/1 0.289 ms 0.289 ms 2692 +OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2695 +OpenCV_MorphGrad2D_Constant_Padding/1 0.260 ms 0.260 ms 2680 +OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5013 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:21:15+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.04, 6.41, 8.07 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.8 ms 11.8 ms 59 +MLIR_Conv2D/1 29.9 ms 29.8 ms 24 +Buddy_Conv2D/1 1.31 ms 1.31 ms 542 +Buddy_Corr2D_Constant_Padding/1 2.38 ms 2.38 ms 293 +OpenCV_Filter2D_Constant_Padding/1 4.20 ms 4.20 ms 166 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4777 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2635 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102983 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48881 +Buddy_Erosion2D_Constant_Padding/1 0.221 ms 0.221 ms 3156 +Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3159 +Buddy_Opening2D_Constant_Padding/1 0.330 ms 0.330 ms 2113 +Buddy_Closing2D_Constant_Padding/1 0.334 ms 0.334 ms 2087 +Buddy_TopHat2D_Constant_Padding/1 0.877 ms 0.877 ms 736 +Buddy_BottomHat2D_Constant_Padding/1 0.891 ms 0.891 ms 740 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5026 +OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3102 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3074 +OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2684 +OpenCV_BottomHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2660 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2685 +OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4970 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:21:40+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.02, 6.06, 7.91 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 22.2 ms 22.2 ms 31 +MLIR_Conv2D/1 68.1 ms 68.1 ms 10 +Buddy_Conv2D/1 2.32 ms 2.32 ms 300 +Buddy_Corr2D_Constant_Padding/1 4.70 ms 4.70 ms 148 +OpenCV_Filter2D_Constant_Padding/1 8.80 ms 8.80 ms 79 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4765 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2606 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103232 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48866 +Buddy_Erosion2D_Constant_Padding/1 0.223 ms 0.223 ms 3108 +Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3124 +Buddy_Opening2D_Constant_Padding/1 0.330 ms 0.330 ms 1961 +Buddy_Closing2D_Constant_Padding/1 0.350 ms 0.350 ms 2094 +Buddy_TopHat2D_Constant_Padding/1 0.899 ms 0.899 ms 764 +Buddy_BottomHat2D_Constant_Padding/1 0.881 ms 0.881 ms 784 +OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 4984 +OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3103 +OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3089 +OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2707 +OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2743 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2695 +OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4997 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:22:04+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.01, 5.73, 7.75 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 22.0 ms 22.0 ms 32 +MLIR_Conv2D/1 68.1 ms 68.1 ms 10 +Buddy_Conv2D/1 2.22 ms 2.22 ms 304 +Buddy_Corr2D_Constant_Padding/1 4.70 ms 4.70 ms 149 +OpenCV_Filter2D_Constant_Padding/1 8.80 ms 8.80 ms 79 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4719 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2629 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102880 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48902 +Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3148 +Buddy_Dilation2D_Constant_Padding/1 0.223 ms 0.223 ms 3112 +Buddy_Opening2D_Constant_Padding/1 0.401 ms 0.401 ms 1703 +Buddy_Closing2D_Constant_Padding/1 0.370 ms 0.370 ms 1933 +Buddy_TopHat2D_Constant_Padding/1 0.940 ms 0.940 ms 724 +Buddy_BottomHat2D_Constant_Padding/1 0.904 ms 0.904 ms 704 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5045 +OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3039 +OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 3127 +OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2665 +OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2664 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2666 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4964 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:22:28+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.01, 5.49, 7.63 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 35.2 ms 35.2 ms 20 +MLIR_Conv2D/1 122 ms 122 ms 6 +Buddy_Conv2D/1 4.21 ms 4.21 ms 167 +Buddy_Corr2D_Constant_Padding/1 7.90 ms 7.90 ms 89 +OpenCV_Filter2D_Constant_Padding/1 5.92 ms 5.92 ms 118 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4760 +Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.266 ms 2636 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103065 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 47744 +Buddy_Erosion2D_Constant_Padding/1 0.220 ms 0.220 ms 3166 +Buddy_Dilation2D_Constant_Padding/1 0.220 ms 0.220 ms 3164 +Buddy_Opening2D_Constant_Padding/1 0.329 ms 0.329 ms 2083 +Buddy_Closing2D_Constant_Padding/1 0.331 ms 0.331 ms 2139 +Buddy_TopHat2D_Constant_Padding/1 0.877 ms 0.877 ms 739 +Buddy_BottomHat2D_Constant_Padding/1 0.863 ms 0.863 ms 755 OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5029 -OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3135 -OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3062 -OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2642 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2674 -OpenCV_MorphGrad2D_Constant_Padding/1 0.260 ms 0.260 ms 2688 -OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4937 -Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +OpenCV_Opening2D_Constant_Padding/1 0.220 ms 0.220 ms 3140 +OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3204 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2744 +OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2737 +OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2177 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5097 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:22:53+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.01, 5.21, 7.48 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 34.8 ms 34.8 ms 20 +MLIR_Conv2D/1 122 ms 122 ms 6 +Buddy_Conv2D/1 3.87 ms 3.87 ms 179 +Buddy_Corr2D_Constant_Padding/1 7.89 ms 7.89 ms 89 +OpenCV_Filter2D_Constant_Padding/1 5.91 ms 5.91 ms 118 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4765 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2629 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102844 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48492 +Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3127 +Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3155 +Buddy_Opening2D_Constant_Padding/1 0.349 ms 0.349 ms 2000 +Buddy_Closing2D_Constant_Padding/1 0.355 ms 0.355 ms 2036 +Buddy_TopHat2D_Constant_Padding/1 0.910 ms 0.910 ms 751 +Buddy_BottomHat2D_Constant_Padding/1 0.912 ms 0.912 ms 755 +OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 5001 +OpenCV_Opening2D_Constant_Padding/1 0.220 ms 0.220 ms 3193 +OpenCV_Closing2D_Constant_Padding/1 0.219 ms 0.219 ms 3193 +OpenCV_TopHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2734 +OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2725 +OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2723 +OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5067 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:23:17+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.00, 4.95, 7.33 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.86 ms 4.86 ms 144 +MLIR_Conv2D/1 7.37 ms 7.37 ms 95 +Buddy_Conv2D/1 0.420 ms 0.420 ms 1663 +Buddy_Corr2D_Constant_Padding/1 1.08 ms 1.08 ms 651 +OpenCV_Filter2D_Constant_Padding/1 1.90 ms 1.90 ms 367 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4748 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2632 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102984 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48927 +Buddy_Erosion2D_Constant_Padding/1 0.224 ms 0.224 ms 3155 +Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3149 +Buddy_Opening2D_Constant_Padding/1 0.360 ms 0.360 ms 2068 +Buddy_Closing2D_Constant_Padding/1 0.356 ms 0.356 ms 1957 +Buddy_TopHat2D_Constant_Padding/1 0.936 ms 0.936 ms 731 +Buddy_BottomHat2D_Constant_Padding/1 0.919 ms 0.919 ms 729 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5005 +OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3076 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3193 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2760 +OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2736 +OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2716 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4963 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:23:41+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.00, 4.71, 7.19 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.87 ms 4.87 ms 143 +MLIR_Conv2D/1 7.37 ms 7.37 ms 95 +Buddy_Conv2D/1 0.425 ms 0.425 ms 1657 +corrupted double-linked list +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +2025-05-26T22:23:45+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.00, 4.66, 7.16 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.8 ms 11.8 ms 59 +MLIR_Conv2D/1 29.9 ms 29.9 ms 24 +Buddy_Conv2D/1 1.02 ms 1.02 ms 751 +Buddy_Corr2D_Constant_Padding/1 1.79 ms 1.79 ms 391 +OpenCV_Filter2D_Constant_Padding/1 2.74 ms 2.74 ms 255 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4778 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2630 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103033 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.016 ms 0.016 ms 48860 +Buddy_Erosion2D_Constant_Padding/1 0.225 ms 0.225 ms 3134 +Buddy_Dilation2D_Constant_Padding/1 0.226 ms 0.226 ms 2971 +Buddy_Opening2D_Constant_Padding/1 0.347 ms 0.347 ms 1881 +Buddy_Closing2D_Constant_Padding/1 0.348 ms 0.348 ms 2009 +Buddy_TopHat2D_Constant_Padding/1 0.927 ms 0.927 ms 714 +Buddy_BottomHat2D_Constant_Padding/1 0.931 ms 0.931 ms 723 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4965 +OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3088 +OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3129 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2625 +OpenCV_BottomHat2D_Constant_Padding/1 0.269 ms 0.269 ms 2625 +OpenCV_MorphGrad2D_Constant_Padding/1 0.260 ms 0.260 ms 2692 +OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5053 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +2025-05-26T22:24:09+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.06, 4.46, 7.03 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.9 ms 11.9 ms 60 +MLIR_Conv2D/1 29.9 ms 29.9 ms 24 +Buddy_Conv2D/1 1.01 ms 1.01 ms 738 +Buddy_Corr2D_Constant_Padding/1 1.78 ms 1.78 ms 393 +OpenCV_Filter2D_Constant_Padding/1 2.75 ms 2.75 ms 255 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4756 +Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2630 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103027 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48458 +Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3152 +Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3142 +Buddy_Opening2D_Constant_Padding/1 0.334 ms 0.334 ms 2069 +Buddy_Closing2D_Constant_Padding/1 0.332 ms 0.332 ms 2004 +Buddy_TopHat2D_Constant_Padding/1 0.916 ms 0.916 ms 753 +Buddy_BottomHat2D_Constant_Padding/1 0.871 ms 0.871 ms 762 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4961 +OpenCV_Opening2D_Constant_Padding/1 0.220 ms 0.220 ms 3152 +OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 2470 +OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2673 +OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2749 +OpenCV_MorphGrad2D_Constant_Padding/1 0.257 ms 0.257 ms 2717 +OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5082 +Testing AVX512 support +CPU does not support AVX512. +Testing NEON support +CPU does not support NEON. From 8f6e489f49f5e8b7306b462c8180b79675ec9f7d Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Tue, 27 May 2025 00:36:32 +0200 Subject: [PATCH 21/52] update web --- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 36 +++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 36 +++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/index.html | 66 +++++++++++++++ site/vectorization/vectorization_matrix.html | 2 +- .../vectorization/vectorization_result.log | 0 .../build_results_crosscompile_summary.log | 0 .../deeplearning/build_results_summary.log | 0 .../deeplearning/dl-layer-ffn-benchmark.json | 0 .../deeplearning/dl-layer-ffn-benchmark.log | 0 .../dl-layer-rmsnorm-benchmark.json | 0 .../dl-layer-rmsnorm-benchmark.log | 0 .../dl-layer-selfattention-benchmark.json | 0 .../dl-layer-selfattention-benchmark.log | 0 .../dl-model-lenet-benchmark.json | 0 .../deeplearning/dl-model-lenet-benchmark.log | 0 .../dl-model-mobilenetv3-benchmark.json | 0 .../dl-model-mobilenetv3-benchmark.log | 0 .../dl-model-resnet18-benchmark.json | 0 .../dl-model-resnet18-benchmark.log | 0 .../dl-model-tinyllama-benchmark.json | 0 .../dl-model-tinyllama-benchmark.log | 0 .../dl-model-whisper-benchmark.json | 0 .../dl-model-whisper-benchmark.log | 0 .../dl-op-linalg-arithaddf-benchmark.json | 0 .../dl-op-linalg-arithaddf-benchmark.log | 0 .../dl-op-linalg-arithdivf-benchmark.json | 0 .../dl-op-linalg-arithdivf-benchmark.log | 0 .../dl-op-linalg-arithmulf-benchmark.json | 0 .../dl-op-linalg-arithmulf-benchmark.log | 0 .../dl-op-linalg-arithnegf-benchmark.json | 0 .../dl-op-linalg-arithnegf-benchmark.log | 0 .../dl-op-linalg-arithsubf-benchmark.json | 0 .../dl-op-linalg-arithsubf-benchmark.log | 0 .../dl-op-linalg-batch-matmul-benchmark.json | 0 .../dl-op-linalg-batch-matmul-benchmark.log | 0 ...-op-linalg-conv2d-nchw-fchw-benchmark.json | 0 ...l-op-linalg-conv2d-nchw-fchw-benchmark.log | 0 ...-op-linalg-conv2d-nhwc-fhwc-benchmark.json | 0 ...l-op-linalg-conv2d-nhwc-fhwc-benchmark.log | 0 ...-op-linalg-conv2d-nhwc-hwcf-benchmark.json | 0 ...l-op-linalg-conv2d-nhwc-hwcf-benchmark.log | 0 ...-depthwise-conv-2d-nhwc-hwc-benchmark.json | 0 ...g-depthwise-conv-2d-nhwc-hwc-benchmark.log | 0 .../dl-op-linalg-mathexp-benchmark.json | 0 .../dl-op-linalg-mathexp-benchmark.log | 0 .../dl-op-linalg-mathfpow-benchmark.json | 0 .../dl-op-linalg-mathfpow-benchmark.log | 0 .../dl-op-linalg-mathrsqrt-benchmark.json | 0 .../dl-op-linalg-mathrsqrt-benchmark.log | 0 .../dl-op-linalg-matmul-benchmark.json | 0 .../dl-op-linalg-matmul-benchmark.log | 0 ...-op-linalg-pooling-nhwc-sum-benchmark.json | 0 ...l-op-linalg-pooling-nhwc-sum-benchmark.log | 0 .../dl-op-linalg-reduceaddf-benchmark.log | 0 .../dl-op-linalg-reducemaxf-benchmark.log | 0 ...-linalg-softmax-exp-sum-div-benchmark.json | 0 ...p-linalg-softmax-exp-sum-div-benchmark.log | 0 .../dl-op-matmul-transpose-b-benchmark.json | 0 .../dl-op-matmul-transpose-b-benchmark.log | 0 .../dl-op-tosa-transpose-benchmark.json | 0 .../dl-op-tosa-transpose-benchmark.log | 0 .../deeplearning/run_results_summary.log | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 80 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 0 ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 0 .../image-processing-result.log | 0 .../vectorization/vectorization_matrix.json | 0 143 files changed, 190 insertions(+), 132 deletions(-) mode change 100644 => 100755 site/deeplearning/dl-layer-ffn-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-layer-rmsnorm-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-layer-selfattention-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-model-lenet-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-model-mobilenetv3-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-model-resnet18-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-model-tinyllama-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-model-whisper-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-arithaddf-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-arithdivf-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-arithmulf-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-arithnegf-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-arithsubf-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-mathexp-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-mathfpow-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-matmul-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-matmul-transpose-b-benchmark.html mode change 100644 => 100755 site/deeplearning/dl-op-tosa-transpose-benchmark.html mode change 100644 => 100755 site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100644 => 100755 site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/index.html mode change 100644 => 100755 site/vectorization/vectorization_matrix.html mode change 100644 => 100755 test/test_result/vectorization/vectorization_result.log mode change 100644 => 100755 test_result/deeplearning/build_results_crosscompile_summary.log mode change 100644 => 100755 test_result/deeplearning/build_results_summary.log mode change 100644 => 100755 test_result/deeplearning/dl-layer-ffn-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-layer-ffn-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-layer-rmsnorm-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-layer-rmsnorm-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-layer-selfattention-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-layer-selfattention-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-model-lenet-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-model-lenet-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-model-mobilenetv3-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-model-mobilenetv3-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-model-resnet18-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-model-resnet18-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-model-tinyllama-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-model-tinyllama-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-model-whisper-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-model-whisper-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-matmul-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-matmul-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log mode change 100644 => 100755 test_result/deeplearning/dl-op-tosa-transpose-benchmark.json mode change 100644 => 100755 test_result/deeplearning/dl-op-tosa-transpose-benchmark.log mode change 100644 => 100755 test_result/deeplearning/run_results_summary.log mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json delete mode 100644 test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json mode change 100644 => 100755 test_result/imageprocessing/image-processing-result.log mode change 100644 => 100755 test_result/vectorization/vectorization_matrix.json diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html old mode 100644 new mode 100755 index 7d85a5c2..867d8c52 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-ffn-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-layer-ffn-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-layer-ffn-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        MLIR_MatMul/119.519.535685429
        diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html old mode 100644 new mode 100755 index e128e7b3..b2345de8 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-rmsnorm-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-layer-rmsnorm-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-layer-rmsnorm-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_FFN/Scalar0.10.110758
        diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html old mode 100644 new mode 100755 index 0769c15a..5db4dcdd --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-selfattention-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-layer-selfattention-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-layer-selfattention-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_RMSNORM/Scalar0.00.0358748
        diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html old mode 100644 new mode 100755 index 447e4139..c592c662 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-lenet-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-model-lenet-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-model-lenet-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_ATTENTION/Scalar4.74.7148
        diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html old mode 100644 new mode 100755 index 32bba417..bb473f6d --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-mobilenetv3-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-model-mobilenetv3-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-model-mobilenetv3-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_LENET/Auto_Vectorization0.20.24396
        diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html old mode 100644 new mode 100755 index 416bf9e6..33b3cd6a --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-resnet18-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-model-resnet18-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-model-resnet18-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_MobileNet_V3/BM_MobileNet_V3_scalar35.335.320
        diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html old mode 100644 new mode 100755 index 36702c42..d0a5bb1a --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-tinyllama-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-model-tinyllama-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-model-tinyllama-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Resnet18/Auto_Vectorization725.8717.91
        diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html old mode 100644 new mode 100755 index 2d6b6b4d..744b14f0 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-whisper-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-model-whisper-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-model-whisper-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_TINYLLAMA/scalar145312.6145306.01
        diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html old mode 100644 new mode 100755 index 84449327..7a497e07 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithaddf-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-arithaddf-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-arithaddf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Whisper/Auto_Vectorization80864.480855.31
        diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html old mode 100644 new mode 100755 index 36a83ac5..fb81cf4a --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithdivf-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-arithdivf-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-arithdivf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_ADDF_SCALAR0.00.023951
        diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html old mode 100644 new mode 100755 index 43877a2c..f9f8f7a4 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithmulf-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-arithmulf-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-arithmulf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_DIVF_SCALAR0.00.022508
        diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html old mode 100644 new mode 100755 index b6337d45..8837c4cc --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithnegf-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-arithnegf-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-arithnegf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_MULF_SCALAR0.00.023392
        diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html old mode 100644 new mode 100755 index 45b665ab..402d3b42 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithsubf-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-arithsubf-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-arithsubf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_NEGF_SCALAR0.00.030765
        diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html old mode 100644 new mode 100755 index 0d496648..70248e97 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-batch-matmul-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-batch-matmul-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-batch-matmul-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_SUBF_SCALAR0.00.023979
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html old mode 100644 new mode 100755 index 97594737..ade6f089 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_BATCH_MATMUL/Scalar/iterations:13551.43551.01
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html old mode 100644 new mode 100755 index db78c360..36d1f3d8 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_Conv2DNchwFchw_SCALAR282.0282.02
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html old mode 100644 new mode 100755 index 1ba3701d..ce5e44dd --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:573.773.65
        diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html old mode 100644 new mode 100755 index 00478064..451d8869 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_CONV_2D_NHWC_HWCF_SCALAR32.632.622
        diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html old mode 100644 new mode 100755 index c4bd5ff7..f61f291c --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathexp-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-mathexp-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-mathexp-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.84.85
        diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html old mode 100644 new mode 100755 index 6a950c67..33a2f718 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathfpow-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-mathfpow-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-mathfpow-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_EXP_SCALAR0.00.015289
        diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html old mode 100644 new mode 100755 index d00ce7b4..6fa9e8ff --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-mathrsqrt-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_FPOW_SCALAR0.10.18096
        diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html old mode 100644 new mode 100755 index 077ca934..85290002 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-matmul-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-matmul-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-matmul-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_RSQRT_SCALAR0.10.19407
        diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html old mode 100644 new mode 100755 index ff4fd434..578dc53c --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-pooling-nhwc-sum-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL/scalar_O0/iterations:14219.84219.71
        diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html old mode 100644 new mode 100755 index e50403bd..08d70811 --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-linalg-softmax-exp-sum-div-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_POOLING_NHWC_SUM_SCALAR0.20.22972
        diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html old mode 100644 new mode 100755 index 75862325..13995416 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-matmul-transpose-b-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-matmul-transpose-b-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-matmul-transpose-b-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_SOFTMAXEXPSUMDIV_SCALAR0.00.0125501
        diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html old mode 100644 new mode 100755 index 764741de..05a7394b --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-tosa-transpose-benchmark.json

        2025-05-26 22:24:35 UTC

        +

        deeplearning/dl-op-tosa-transpose-benchmark.json

        2025-05-26 22:36:20 UTC

        dl-op-tosa-transpose-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51265.31263.65
        diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index e0bf812c..be86d3c4 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:529.720.95
        diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..ebcfa10c --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        MLIR_Conv2D/129.929.924
        Buddy_Conv2D/11.01.0751
        Buddy_Corr2D_Constant_Padding/11.81.8391
        OpenCV_Filter2D_Constant_Padding/12.72.7255
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14778
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32630
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103033
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048860
        Buddy_Erosion2D_Constant_Padding/10.20.23134
        Buddy_Dilation2D_Constant_Padding/10.20.22971
        Buddy_Opening2D_Constant_Padding/10.30.31881
        Buddy_Closing2D_Constant_Padding/10.30.32009
        Buddy_TopHat2D_Constant_Padding/10.90.9714
        Buddy_BottomHat2D_Constant_Padding/10.90.9723
        OpenCV_Erode2D_Constant_Padding/10.10.14965
        OpenCV_Opening2D_Constant_Padding/10.20.23088
        OpenCV_Closing2D_Constant_Padding/10.20.23129
        OpenCV_TopHat2D_Constant_Padding/10.30.32625
        OpenCV_BottomHat2D_Constant_Padding/10.30.32625
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32692
        OpenCV_Dilate2D_Constant_Padding/10.10.15053
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..f3c6f4ab --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,36 @@ + + + +

        imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.911.960
        MLIR_Conv2D/129.929.924
        Buddy_Conv2D/11.01.0738
        Buddy_Corr2D_Constant_Padding/11.81.8393
        OpenCV_Filter2D_Constant_Padding/12.82.8255
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14756
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32630
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103027
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048458
        Buddy_Erosion2D_Constant_Padding/10.20.23152
        Buddy_Dilation2D_Constant_Padding/10.20.23142
        Buddy_Opening2D_Constant_Padding/10.30.32069
        Buddy_Closing2D_Constant_Padding/10.30.32004
        Buddy_TopHat2D_Constant_Padding/10.90.9753
        Buddy_BottomHat2D_Constant_Padding/10.90.9762
        OpenCV_Erode2D_Constant_Padding/10.10.14961
        OpenCV_Opening2D_Constant_Padding/10.20.23152
        OpenCV_Closing2D_Constant_Padding/10.20.22470
        OpenCV_TopHat2D_Constant_Padding/10.30.32673
        OpenCV_BottomHat2D_Constant_Padding/10.30.32749
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32717
        OpenCV_Dilate2D_Constant_Padding/10.10.15082
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index 3721e999..12877fc3 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index b3e875ee..3afea3cb --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index 30c37fee..c5efcd21 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index c4852253..e2871383 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0139
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index 9f6f4448..c2199d0b --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index 4fb507cb..e0f9fe9f --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.860
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index 730137c2..10986ccb --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index 5ff09924..707e7f86 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.222.231
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index 4d6ec707..26bd0a1f --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.022.032
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index fb5643ed..459fc4a3 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.235.220
        diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index 524fe758..56dc37a8 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/134.834.820
        diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index f7e1cb5e..8a59b557 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0139
        diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index 9dbe1bab..98d95517 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index be961174..ad8ba0e6 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.911.960
        diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index 4660feda..6786f881 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.611.660
        diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index e151144d..dd69c474 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index b288e88f..19d021e2 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index 90c4b26b..3522b04b --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9143
        diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index f9868cf6..6db815a3 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9143
        diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index bddd4186..9195a287 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index 6bd0958e..e40bda66 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index ffb15e51..5a325445 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.022.032
        diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100644 new mode 100755 index 16d450da..14650c07 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.122.132
        diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100644 new mode 100755 index 1839d422..3a8f5d95 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:24:35 UTC

        +

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.235.220
        diff --git a/site/index.html b/site/index.html new file mode 100644 index 00000000..e39390f8 --- /dev/null +++ b/site/index.html @@ -0,0 +1,66 @@ + + +

        Buddy-Benchmark results

        \ No newline at end of file diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html old mode 100644 new mode 100755 index 2d1fc70a..49b6b158 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        vectorization/vectorization_matrix.json

        2025-05-26 22:24:35 UTC

        +

        vectorization/vectorization_matrix.json

        2025-05-26 22:36:20 UTC

        vectorization_matrix.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.335.320
        diff --git a/test/test_result/vectorization/vectorization_result.log b/test/test_result/vectorization/vectorization_result.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/build_results_summary.log b/test_result/deeplearning/build_results_summary.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.json b/test_result/deeplearning/dl-layer-ffn-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.json b/test_result/deeplearning/dl-layer-selfattention-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.json b/test_result/deeplearning/dl-model-lenet-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.json b/test_result/deeplearning/dl-model-resnet18-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.log b/test_result/deeplearning/dl-model-resnet18-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.json b/test_result/deeplearning/dl-model-tinyllama-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.log b/test_result/deeplearning/dl-model-tinyllama-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.json b/test_result/deeplearning/dl-model-whisper-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.log b/test_result/deeplearning/dl-model-whisper-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log old mode 100644 new mode 100755 diff --git a/test_result/deeplearning/run_results_summary.log b/test_result/deeplearning/run_results_summary.log old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json deleted file mode 100644 index 99961cec..00000000 --- a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "context": { - "date": "2025-05-26T22:23:41+00:00", - "host_name": "4ed4bacfe45d", - "executable": "./bin/image-processing-benchmark", - "num_cpus": 24, - "mhz_per_cpu": 5100, - "cpu_scaling_enabled": true, - "caches": [ - { - "type": "Data", - "level": 1, - "size": 49152, - "num_sharing": 2 - }, - { - "type": "Instruction", - "level": 1, - "size": 32768, - "num_sharing": 2 - }, - { - "type": "Unified", - "level": 2, - "size": 1310720, - "num_sharing": 2 - }, - { - "type": "Unified", - "level": 3, - "size": 31457280, - "num_sharing": 24 - } - ], - "load_avg": [2.00049,4.70947,7.18896], - "library_build_type": "release" - }, - "benchmarks": [ - { - "name": "Eigen_Convolve2D/1", - "family_index": 0, - "per_family_instance_index": 0, - "run_name": "Eigen_Convolve2D/1", - "run_type": "iteration", - "repetitions": 1, - "repetition_index": 0, - "threads": 1, - "iterations": 143, - "real_time": 4.8717384884407471e+00, - "cpu_time": 4.8714335384615381e+00, - "time_unit": "ms" - }, - { - "name": "MLIR_Conv2D/1", - "family_index": 1, - "per_family_instance_index": 0, - "run_name": "MLIR_Conv2D/1", - "run_type": "iteration", - "repetitions": 1, - "repetition_index": 0, - "threads": 1, - "iterations": 95, - "real_time": 7.3674740956017848e+00, - "cpu_time": 7.3670748842105276e+00, - "time_unit": "ms" - }, - { - "name": "Buddy_Conv2D/1", - "family_index": 2, - "per_family_instance_index": 0, - "run_name": "Buddy_Conv2D/1", - "run_type": "iteration", - "repetitions": 1, - "repetition_index": 0, - "threads": 1, - "iterations": 1657, - "real_time": 4.2453381037884758e-01, - "cpu_time": 4.2450378636089325e-01, - "time_unit": "ms" - } \ No newline at end of file diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100644 new mode 100755 diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log old mode 100644 new mode 100755 diff --git a/test_result/vectorization/vectorization_matrix.json b/test_result/vectorization/vectorization_matrix.json old mode 100644 new mode 100755 From 41780d8fa86450a2b1ab54643994b6a7d9dc2085 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 1 Jun 2025 11:10:47 +0200 Subject: [PATCH 22/52] update the script --- scripts/logs2html.py | 20 +++++++++++++------ site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/vectorization/vectorization_matrix.html | 2 +- .../vectorization/vectorization_result.log | 6 ------ test/test_script_imageprocessing.sh | 4 +++- test/test_script_vectorizationprocessing.sh | 10 +++++++--- 58 files changed, 78 insertions(+), 70 deletions(-) mode change 100755 => 100644 site/deeplearning/dl-layer-ffn-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-layer-rmsnorm-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-layer-selfattention-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-model-lenet-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-model-mobilenetv3-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-model-resnet18-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-model-tinyllama-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-model-whisper-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-arithaddf-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-arithdivf-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-arithmulf-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-arithnegf-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-arithsubf-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-mathexp-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-mathfpow-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-matmul-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-matmul-transpose-b-benchmark.html mode change 100755 => 100644 site/deeplearning/dl-op-tosa-transpose-benchmark.html mode change 100755 => 100644 site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html mode change 100755 => 100644 site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 site/vectorization/vectorization_matrix.html delete mode 100755 test/test_result/vectorization/vectorization_result.log diff --git a/scripts/logs2html.py b/scripts/logs2html.py index 2ea37b24..ee44e977 100755 --- a/scripts/logs2html.py +++ b/scripts/logs2html.py @@ -24,15 +24,23 @@ def gbench_json_to_table(js_path: pathlib.Path) -> str: data = json.loads(js_path.read_text())["benchmarks"] - head = ("" - "") + + # pick the first “real” iteration row to read the time_unit + first = next(b for b in data if b.get("run_type") == "iteration") + unit = html.escape(first.get("time_unit", "ns")) + + head = (f"" + f"") + rows = "\n".join( f"" - f"" - f"" - f"" - for b in data if "name" in b + f"" + f"" + f"" + for b in data + if b.get("run_type") == "iteration" # ignore _mean, _stddev ) + return f"

        {js_path.name}

        \n
        NameTime (ns)CPU (ns)Iterations
        MLIR_MatMul/119.519.535685429
        NameTime (ns)CPU (ns)Iterations
        NameTime ({unit})CPU ({unit})Iterations
        {html.escape(b['name'])}{b['real_time']:.1f}{b['cpu_time']:.1f}{b['iterations']}
        {b['real_time']:.3g}{b['cpu_time']:.3g}{b['iterations']:,}
        {head}\n{rows}
        " # --------------------------------------------------------------------------- diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html old mode 100755 new mode 100644 index 867d8c52..554a7ada --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-ffn-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-layer-ffn-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-layer-ffn-benchmark.json

        diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html old mode 100755 new mode 100644 index b2345de8..dfdce6c5 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-rmsnorm-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-layer-rmsnorm-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-layer-rmsnorm-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_FFN/Scalar0.10.110758
        diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html old mode 100755 new mode 100644 index 5db4dcdd..23bcb770 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-layer-selfattention-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-layer-selfattention-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-layer-selfattention-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_RMSNORM/Scalar0.00.0358748
        diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html old mode 100755 new mode 100644 index c592c662..48de8cee --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-lenet-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-model-lenet-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-model-lenet-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_ATTENTION/Scalar4.74.7148
        diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html old mode 100755 new mode 100644 index bb473f6d..42aab51e --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-mobilenetv3-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-model-mobilenetv3-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-model-mobilenetv3-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_LENET/Auto_Vectorization0.20.24396
        diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html old mode 100755 new mode 100644 index 33b3cd6a..f772d281 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-resnet18-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-model-resnet18-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-model-resnet18-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_MobileNet_V3/BM_MobileNet_V3_scalar35.335.320
        diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html old mode 100755 new mode 100644 index d0a5bb1a..2f7b58d9 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-tinyllama-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-model-tinyllama-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-model-tinyllama-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Resnet18/Auto_Vectorization725.8717.91
        diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html old mode 100755 new mode 100644 index 744b14f0..bf0249ff --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-model-whisper-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-model-whisper-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-model-whisper-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_TINYLLAMA/scalar145312.6145306.01
        diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html old mode 100755 new mode 100644 index 7a497e07..e2bc4e0c --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithaddf-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-arithaddf-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-arithaddf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Whisper/Auto_Vectorization80864.480855.31
        diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html old mode 100755 new mode 100644 index fb81cf4a..cbbc3b68 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithdivf-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-arithdivf-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-arithdivf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_ADDF_SCALAR0.00.023951
        diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html old mode 100755 new mode 100644 index f9f8f7a4..448c1217 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithmulf-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-arithmulf-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-arithmulf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_DIVF_SCALAR0.00.022508
        diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html old mode 100755 new mode 100644 index 8837c4cc..e778faac --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithnegf-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-arithnegf-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-arithnegf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_MULF_SCALAR0.00.023392
        diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html old mode 100755 new mode 100644 index 402d3b42..77a5298c --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-arithsubf-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-arithsubf-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-arithsubf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_NEGF_SCALAR0.00.030765
        diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html old mode 100755 new mode 100644 index 70248e97..16e23a1f --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-batch-matmul-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-batch-matmul-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-batch-matmul-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_SUBF_SCALAR0.00.023979
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html old mode 100755 new mode 100644 index ade6f089..c17394a8 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_BATCH_MATMUL/Scalar/iterations:13551.43551.01
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html old mode 100755 new mode 100644 index 36d1f3d8..bc9c8cc3 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_Conv2DNchwFchw_SCALAR282.0282.02
        diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html old mode 100755 new mode 100644 index ce5e44dd..756eb29a --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:573.773.65
        diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html old mode 100755 new mode 100644 index 451d8869..7f08f18d --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_CONV_2D_NHWC_HWCF_SCALAR32.632.622
        diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html old mode 100755 new mode 100644 index f61f291c..341c32bd --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathexp-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-mathexp-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-mathexp-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.84.85
        diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html old mode 100755 new mode 100644 index 33a2f718..652a8f96 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathfpow-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-mathfpow-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-mathfpow-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_EXP_SCALAR0.00.015289
        diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html old mode 100755 new mode 100644 index 6fa9e8ff..8c7d6d72 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-mathrsqrt-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_FPOW_SCALAR0.10.18096
        diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html old mode 100755 new mode 100644 index 85290002..44d5a587 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-matmul-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-matmul-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-matmul-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_RSQRT_SCALAR0.10.19407
        diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html old mode 100755 new mode 100644 index 578dc53c..a958f85d --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-pooling-nhwc-sum-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL/scalar_O0/iterations:14219.84219.71
        diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html old mode 100755 new mode 100644 index 08d70811..cbb9f3fb --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-linalg-softmax-exp-sum-div-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_POOLING_NHWC_SUM_SCALAR0.20.22972
        diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html old mode 100755 new mode 100644 index 13995416..29d351a3 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-matmul-transpose-b-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-matmul-transpose-b-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-matmul-transpose-b-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        BM_SOFTMAXEXPSUMDIV_SCALAR0.00.0125501
        diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html old mode 100755 new mode 100644 index 05a7394b..6a6cea80 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        deeplearning/dl-op-tosa-transpose-benchmark.json

        2025-05-26 22:36:20 UTC

        +

        deeplearning/dl-op-tosa-transpose-benchmark.json

        2025-05-26 22:41:02 UTC

        dl-op-tosa-transpose-benchmark.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51265.31263.65
        diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index be86d3c4..7e4128c0 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:529.720.95
        diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index ebcfa10c..931ee0a4 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index f3c6f4ab..f51cdc17 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index 12877fc3..a804169e --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.911.960
        diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index 3afea3cb..f4c10989 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index c5efcd21..9b2f65b9 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index e2871383..9e39593b --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0139
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index c2199d0b..1dbfa770 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index e0f9fe9f..b663778c --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.860
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index 10986ccb..1f562044 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index 707e7f86..f141accd --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.222.231
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index 26bd0a1f..f6043071 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.022.032
        diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index 459fc4a3..b7108d4d --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.235.220
        diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index 56dc37a8..06953992 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/134.834.820
        diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index 8a59b557..85b5d8a3 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0139
        diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index 98d95517..eea020f2 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index ad8ba0e6..8e4999cd --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.911.960
        diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index 6786f881..84b91387 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.611.660
        diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index dd69c474..3eca17ee --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index 19d021e2..65782f9c --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index 3522b04b..ef0e3d70 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9143
        diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index 6db815a3..b679d474 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9143
        diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index 9195a287..5beb7557 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index e40bda66..c83fa86f --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index 5a325445..b830bd5a --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.022.032
        diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html old mode 100755 new mode 100644 index 14650c07..932f4a65 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.122.132
        diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html old mode 100755 new mode 100644 index 3a8f5d95..472a8f61 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:36:20 UTC

        +

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.235.220
        diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html old mode 100755 new mode 100644 index 49b6b158..04d171b8 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -9,7 +9,7 @@ summary{font-weight:600;cursor:pointer} -

        vectorization/vectorization_matrix.json

        2025-05-26 22:36:20 UTC

        +

        vectorization/vectorization_matrix.json

        2025-05-26 22:41:02 UTC

        vectorization_matrix.json

        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.335.320
        diff --git a/test/test_result/vectorization/vectorization_result.log b/test/test_result/vectorization/vectorization_result.log deleted file mode 100755 index b0ff4d66..00000000 --- a/test/test_result/vectorization/vectorization_result.log +++ /dev/null @@ -1,6 +0,0 @@ -Vectorization Benchmark - Sun May 25 19:22:50 UTC 2025 -[Info] Running CMake configuration... -CMake Error: The source directory "/home/buddy-complier-workspace/buddy-benchmark/test" does not appear to contain CMakeLists.txt. -Specify --help for usage, or press the help button on the CMake GUI. -[Info] Building vectorization-matrix-benchmark... -ninja: error: loading 'build.ninja': No such file or directory diff --git a/test/test_script_imageprocessing.sh b/test/test_script_imageprocessing.sh index ffbffa7c..0bdf9fa1 100755 --- a/test/test_script_imageprocessing.sh +++ b/test/test_script_imageprocessing.sh @@ -50,13 +50,15 @@ for feature in "${features[@]}"; do slug="$(echo "${feature}_${img_slug}_${kern}_${morph}_${boundary}" \ | tr ' /' '__')" json_out="${RESULT_DIR}/${slug}.json" + log_out="${RESULT_DIR}/${slug}.log" # ----------------------------------------------------------------------- ./bin/image-processing-benchmark \ "$img" "$kern" "$morph" "$boundary" \ --benchmark_out="$json_out" \ --benchmark_out_format=json \ - 2>&1 | grep -v "Saved PNG file." >> "$LOG" + > "$log_out" 2>&1 + echo "[Success] …" | tee -a "$LOG" done done done diff --git a/test/test_script_vectorizationprocessing.sh b/test/test_script_vectorizationprocessing.sh index 2e081dcb..6bf34fbc 100755 --- a/test/test_script_vectorizationprocessing.sh +++ b/test/test_script_vectorizationprocessing.sh @@ -49,10 +49,14 @@ export QEMU_LD_PREFIX=/usr/riscv64-linux-gnu ################################################################################ cd bin echo "[Info] Running vectorization-matrix-benchmark..." | tee -a "${LOG_FILE}" +json_out="${RESULT_DIR}/vectorization_matrix.json" +log_out="${RESULT_DIR}/vectorization_matrix.log" + ./vectorization-matrix-benchmark \ - --benchmark_out="${RESULT_DIR}/vectorization_matrix.json" \ - --benchmark_out_format=json \ - 2>&1 | tee -a "${LOG_FILE}" + --benchmark_out="$json_out" \ + --benchmark_out_format=json \ + > "$log_out" 2>&1 +tee -a "$LOG_FILE" < "$log_out" echo "[Info] Benchmark completed. Log saved to ${LOG_FILE}" \ No newline at end of file From e55d8ee58b142fee983a0ac0f9c20b1cc13b6071 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 1 Jun 2025 12:23:28 +0200 Subject: [PATCH 23/52] update the script. --- scripts/logs2html.py | 53 +- site/deeplearning/dl-layer-ffn-benchmark.html | 17 +- .../dl-layer-rmsnorm-benchmark.html | 17 +- .../dl-layer-selfattention-benchmark.html | 15 +- .../dl-model-lenet-benchmark.html | 17 +- .../dl-model-mobilenetv3-benchmark.html | 17 +- .../dl-model-resnet18-benchmark.html | 17 +- .../dl-model-tinyllama-benchmark.html | 21 +- .../dl-model-whisper-benchmark.html | 17 +- .../dl-op-linalg-arithaddf-benchmark.html | 17 +- .../dl-op-linalg-arithdivf-benchmark.html | 17 +- .../dl-op-linalg-arithmulf-benchmark.html | 17 +- .../dl-op-linalg-arithnegf-benchmark.html | 17 +- .../dl-op-linalg-arithsubf-benchmark.html | 17 +- .../dl-op-linalg-batch-matmul-benchmark.html | 35 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 17 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 23 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 17 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 19 +- .../dl-op-linalg-mathexp-benchmark.html | 17 +- .../dl-op-linalg-mathfpow-benchmark.html | 17 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 17 +- .../dl-op-linalg-matmul-benchmark.html | 29 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 17 +- .../dl-op-linalg-reduceaddf-benchmark.html | 26 + .../dl-op-linalg-reducemaxf-benchmark.html | 26 + ...-linalg-softmax-exp-sum-div-benchmark.html | 17 +- .../dl-op-matmul-transpose-b-benchmark.html | 25 +- .../dl-op-tosa-transpose-benchmark.html | 17 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 107 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 107 +- site/index.html | 4 + site/vectorization/vectorization_matrix.html | 32 +- .../build_results_crosscompile_summary.log | 0 .../deeplearning/build_results_summary.log | 0 .../deeplearning/dl-layer-ffn-benchmark.json | 16 +- .../deeplearning/dl-layer-ffn-benchmark.log | 8 +- .../dl-layer-rmsnorm-benchmark.json | 16 +- .../dl-layer-rmsnorm-benchmark.log | 8 +- .../dl-layer-selfattention-benchmark.json | 14 +- .../dl-layer-selfattention-benchmark.log | 6 +- .../dl-model-lenet-benchmark.json | 16 +- .../deeplearning/dl-model-lenet-benchmark.log | 8 +- .../dl-model-mobilenetv3-benchmark.json | 16 +- .../dl-model-mobilenetv3-benchmark.log | 8 +- .../dl-model-resnet18-benchmark.json | 12 +- .../dl-model-resnet18-benchmark.log | 8 +- .../dl-model-tinyllama-benchmark.json | 16 +- .../dl-model-tinyllama-benchmark.log | 10 +- .../dl-model-whisper-benchmark.json | 12 +- .../dl-model-whisper-benchmark.log | 8 +- .../dl-op-linalg-arithaddf-benchmark.json | 16 +- .../dl-op-linalg-arithaddf-benchmark.log | 8 +- .../dl-op-linalg-arithdivf-benchmark.json | 16 +- .../dl-op-linalg-arithdivf-benchmark.log | 8 +- .../dl-op-linalg-arithmulf-benchmark.json | 16 +- .../dl-op-linalg-arithmulf-benchmark.log | 8 +- .../dl-op-linalg-arithnegf-benchmark.json | 16 +- .../dl-op-linalg-arithnegf-benchmark.log | 8 +- .../dl-op-linalg-arithsubf-benchmark.json | 16 +- .../dl-op-linalg-arithsubf-benchmark.log | 8 +- .../dl-op-linalg-batch-matmul-benchmark.json | 32 +- .../dl-op-linalg-batch-matmul-benchmark.log | 16 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.json | 14 +- ...l-op-linalg-conv2d-nchw-fchw-benchmark.log | 8 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.json | 20 +- ...l-op-linalg-conv2d-nhwc-fhwc-benchmark.log | 10 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.json | 16 +- ...l-op-linalg-conv2d-nhwc-hwcf-benchmark.log | 8 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.json | 16 +- ...g-depthwise-conv-2d-nhwc-hwc-benchmark.log | 8 +- .../dl-op-linalg-mathexp-benchmark.json | 16 +- .../dl-op-linalg-mathexp-benchmark.log | 8 +- .../dl-op-linalg-mathfpow-benchmark.json | 16 +- .../dl-op-linalg-mathfpow-benchmark.log | 8 +- .../dl-op-linalg-mathrsqrt-benchmark.json | 16 +- .../dl-op-linalg-mathrsqrt-benchmark.log | 8 +- .../dl-op-linalg-matmul-benchmark.json | 24 +- .../dl-op-linalg-matmul-benchmark.log | 14 +- ...-op-linalg-pooling-nhwc-sum-benchmark.json | 16 +- ...l-op-linalg-pooling-nhwc-sum-benchmark.log | 8 +- .../dl-op-linalg-reduceaddf-benchmark.json | 38 + .../dl-op-linalg-reduceaddf-benchmark.log | 4 +- .../dl-op-linalg-reducemaxf-benchmark.json | 38 + .../dl-op-linalg-reducemaxf-benchmark.log | 4 +- ...-linalg-softmax-exp-sum-div-benchmark.json | 16 +- ...p-linalg-softmax-exp-sum-div-benchmark.log | 8 +- .../dl-op-matmul-transpose-b-benchmark.json | 20 +- .../dl-op-matmul-transpose-b-benchmark.log | 12 +- .../dl-op-tosa-transpose-benchmark.json | 12 +- .../dl-op-tosa-transpose-benchmark.log | 8 +- .../deeplearning/run_results_summary.log | 0 ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 348 ++++++ ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 132 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 134 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 132 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 132 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 134 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +-- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 56 + ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 132 +-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 56 + .../image-processing-result.log | 992 +----------------- .../vectorization/vectorization_matrix.json | 16 +- .../vectorization/vectorization_matrix.log | 21 + .../vectorization/vectorization_result.log | 44 +- 178 files changed, 6942 insertions(+), 4044 deletions(-) create mode 100644 site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html create mode 100644 site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html create mode 100644 site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html mode change 100755 => 100644 test_result/deeplearning/build_results_crosscompile_summary.log mode change 100755 => 100644 test_result/deeplearning/build_results_summary.log mode change 100755 => 100644 test_result/deeplearning/dl-layer-ffn-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-layer-ffn-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-layer-rmsnorm-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-layer-rmsnorm-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-layer-selfattention-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-layer-selfattention-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-model-lenet-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-model-lenet-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-model-mobilenetv3-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-model-mobilenetv3-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-model-resnet18-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-model-resnet18-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-model-tinyllama-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-model-tinyllama-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-model-whisper-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-model-whisper-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-matmul-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-matmul-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log mode change 100755 => 100644 test_result/deeplearning/dl-op-tosa-transpose-benchmark.json mode change 100755 => 100644 test_result/deeplearning/dl-op-tosa-transpose-benchmark.log mode change 100755 => 100644 test_result/deeplearning/run_results_summary.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log create mode 100644 test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json create mode 100644 test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log mode change 100755 => 100644 test_result/imageprocessing/image-processing-result.log mode change 100755 => 100644 test_result/vectorization/vectorization_matrix.json create mode 100644 test_result/vectorization/vectorization_matrix.log mode change 100755 => 100644 test_result/vectorization/vectorization_result.log diff --git a/scripts/logs2html.py b/scripts/logs2html.py index ee44e977..90053c4c 100755 --- a/scripts/logs2html.py +++ b/scripts/logs2html.py @@ -2,9 +2,13 @@ """ Turn every *.json under into /.html. If a twin *.log exists (same stem), show it in a collapsible
        . +If the JSON is unreadable, generate a red “FAILED” page instead of aborting. """ -import html, json, pathlib, datetime, sys +import html, json, pathlib, datetime, sys, traceback + +class BrokenJSON(RuntimeError): + pass src, dst = map(pathlib.Path, sys.argv[1:3]) dst.mkdir(parents=True, exist_ok=True) @@ -19,15 +23,29 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} """ def gbench_json_to_table(js_path: pathlib.Path) -> str: - data = json.loads(js_path.read_text())["benchmarks"] + """Turn one Google-Benchmark JSON file into an HTML
        NameTime (ns)CPU (ns)Iterations
        MLIR_MatMul/119.519.535685429
        .""" + try: + payload = json.loads(js_path.read_text()) + except json.JSONDecodeError as e: + raise BrokenJSON(f"JSON parse error: {e.msg}") from e + + if "benchmarks" not in payload: + raise BrokenJSON("Missing top-level ‘benchmarks’ array") + + data = payload["benchmarks"] + if not data: + raise BrokenJSON("Empty ‘benchmarks’ array") - # pick the first “real” iteration row to read the time_unit - first = next(b for b in data if b.get("run_type") == "iteration") - unit = html.escape(first.get("time_unit", "ns")) + first = next((b for b in data if b.get("run_type") == "iteration"), None) + if not first: + raise BrokenJSON("No ‘iteration’ rows found") + + unit = html.escape(first.get("time_unit", "ns")) head = (f"" f"") @@ -38,25 +56,30 @@ def gbench_json_to_table(js_path: pathlib.Path) -> str: f"" f"" for b in data - if b.get("run_type") == "iteration" # ignore _mean, _stddev + if b.get("run_type") == "iteration" ) - return f"

        {js_path.name}

        \n
        NameTime ({unit})CPU ({unit})Iterations
        {b['cpu_time']:.3g}{b['iterations']:,}
        {head}\n{rows}
        " # --------------------------------------------------------------------------- for js in src.rglob("*.json"): print("→ parsing", js) - log = js.with_suffix(".log") # same stem, optional + log = js.with_suffix(".log") rel = js.relative_to(src) page = dst / rel.with_suffix(".html") page.parent.mkdir(parents=True, exist_ok=True) - body = [CSS, - f"

        {rel}

        {stamp}

        ", - gbench_json_to_table(js)] + body = [CSS, f"

        {rel}

        {stamp}

        "] + + try: + body.append(gbench_json_to_table(js)) + except (BrokenJSON, json.JSONDecodeError) as err: + # Build a failure stub but keep the run going + body.append(f"
        ⚠ FAILED: " + f"{html.escape(str(err))}
        ") - if log.exists(): # include console output if present + # Always embed the console log if available + if log.exists(): body.append("
        Console output\n" f"
        {html.escape(log.read_text())}
        ") @@ -68,6 +91,8 @@ def gbench_json_to_table(js_path: pathlib.Path) -> str: links = "\n".join( f'
      • ' f'{p.relative_to(dst).as_posix()}
      • ' - for p in sorted(dst.rglob("*.html")) if p.name != "index.html" + for p in sorted(dst.rglob("*.html")) + if p.name != "index.html" ) -(dst / "index.html").write_text(CSS + f"

        Buddy-Benchmark results

          \n{links}\n
        ") +(dst / "index.html").write_text( + CSS + "

        Buddy-Benchmark results

          \n" + links + "\n
        ") diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index 554a7ada..7dd67bee 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-layer-ffn-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-layer-ffn-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-layer-ffn-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_FFN/Scalar0.10.110758
        DL_LAYER_FFN/Auto_Vectorization0.00.025878
        + + +
        NameTime (ms)CPU (ms)Iterations
        DL_LAYER_FFN/Scalar0.06530.065310,788
        DL_LAYER_FFN/Auto_Vectorization0.0270.02725,830
        Console output -
        2025-05-26T21:12:34+00:00
        +
        2025-06-01T09:43:17+00:00
         Running ./dl-layer-ffn-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-layer-ffn-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.20, 1.96 +Load Average: 1.02, 1.93, 3.98 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------- -DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10758 -DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25878 +DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10788 +DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25830 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index dfdce6c5..9a9c019c 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-layer-rmsnorm-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-layer-rmsnorm-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-layer-rmsnorm-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_RMSNORM/Scalar0.00.0358748
        DL_LAYER_RMSNORM/Auto_Vectorization0.00.0753724
        + + +
        NameTime (ms)CPU (ms)Iterations
        DL_LAYER_RMSNORM/Scalar0.001960.00196355,522
        DL_LAYER_RMSNORM/Auto_Vectorization0.0009070.000907763,038
        Console output -
        2025-05-26T21:12:38+00:00
        +
        2025-06-01T09:43:21+00:00
         Running ./dl-layer-rmsnorm-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-layer-rmsnorm-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.19, 1.95 +Load Average: 1.02, 1.93, 3.98 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------ -DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 358748 -DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 753724 +DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 355522 +DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 763038 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index 23bcb770..aef64b00 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-layer-selfattention-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-layer-selfattention-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-layer-selfattention-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        DL_LAYER_ATTENTION/Scalar4.74.7148
        DL_LAYER_ATTENTION/Auto_Vectorization1.61.6447
        + + +
        NameTime (ms)CPU (ms)Iterations
        DL_LAYER_ATTENTION/Scalar4.694.69149
        DL_LAYER_ATTENTION/Auto_Vectorization1.571.57447
        Console output -
        2025-05-26T21:12:36+00:00
        +
        2025-06-01T09:43:19+00:00
         Running ./dl-layer-selfattention-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,12 +24,12 @@ 

        dl-layer-selfattention-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.19, 1.95 +Load Average: 1.02, 1.93, 3.98 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -DL_LAYER_ATTENTION/Scalar 4.71 ms 4.71 ms 148 +DL_LAYER_ATTENTION/Scalar 4.69 ms 4.69 ms 149 DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 447 ----------------------------------------------------------- Correctness Verification: PASS diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index 48de8cee..ad337c18 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-model-lenet-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-model-lenet-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-model-lenet-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_LENET/Auto_Vectorization0.20.24396
        DL_MODEL_LENET/Buddy_Vectorization0.10.15074
        + + +
        NameTime (ms)CPU (ms)Iterations
        DL_MODEL_LENET/Auto_Vectorization0.1610.1614,427
        DL_MODEL_LENET/Buddy_Vectorization0.1360.1365,106
        Console output -
        2025-05-26T21:08:36+00:00
        +
        2025-06-01T09:39:21+00:00
         Running ./dl-model-lenet-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-model-lenet-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.37, 1.45, 2.25 +Load Average: 2.00, 3.05, 4.86 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------- -DL_MODEL_LENET/Auto_Vectorization 0.155 ms 0.155 ms 4396 -DL_MODEL_LENET/Buddy_Vectorization 0.137 ms 0.137 ms 5074 +DL_MODEL_LENET/Auto_Vectorization 0.161 ms 0.161 ms 4427 +DL_MODEL_LENET/Buddy_Vectorization 0.136 ms 0.136 ms 5106 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index 42aab51e..34c6a586 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-model-mobilenetv3-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-model-mobilenetv3-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-model-mobilenetv3-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_MobileNet_V3/BM_MobileNet_V3_scalar35.335.320
        BM_MobileNet_V3/BM_MobileNet_V3_conv_opt32.232.221
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_MobileNet_V3/BM_MobileNet_V3_scalar35.935.918
        BM_MobileNet_V3/BM_MobileNet_V3_conv_opt32.732.722
        Console output -
        2025-05-26T21:08:34+00:00
        +
        2025-06-01T09:39:18+00:00
         Running ./dl-model-mobilenetv3-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-model-mobilenetv3-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.40, 1.46, 2.26 +Load Average: 2.00, 3.05, 4.86 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------- -BM_MobileNet_V3/BM_MobileNet_V3_scalar 35.3 ms 35.3 ms 20 -BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.2 ms 32.2 ms 21 +BM_MobileNet_V3/BM_MobileNet_V3_scalar 35.9 ms 35.9 ms 18 +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.7 ms 32.7 ms 22 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index f772d281..150e7970 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-model-resnet18-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-model-resnet18-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-model-resnet18-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Resnet18/Auto_Vectorization725.8717.91
        DL_MODEL_Resnet18/Buddy_Vectorization723.3723.31
        + + +
        NameTime (ms)CPU (ms)Iterations
        DL_MODEL_Resnet18/Auto_Vectorization7197181
        DL_MODEL_Resnet18/Buddy_Vectorization7267181
        Console output -
        2025-05-26T21:12:31+00:00
        +
        2025-06-01T09:43:14+00:00
         Running ./dl-model-resnet18-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-model-resnet18-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.20, 1.96 +Load Average: 1.02, 1.94, 4.00 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -DL_MODEL_Resnet18/Auto_Vectorization 726 ms 718 ms 1 -DL_MODEL_Resnet18/Buddy_Vectorization 723 ms 723 ms 1 +DL_MODEL_Resnet18/Auto_Vectorization 719 ms 718 ms 1 +DL_MODEL_Resnet18/Buddy_Vectorization 726 ms 718 ms 1 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index 2f7b58d9..f834f3b1 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -7,16 +7,17 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-model-tinyllama-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-model-tinyllama-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-model-tinyllama-benchmark.json

        - - - -
        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_TINYLLAMA/scalar145312.6145306.01
        DL_MODEL_TINYLLAMA/matmul_opt9843.29842.81
        DL_MODEL_TINYLLAMA/matmul_opt_omp7800.47157.21
        + + + +
        NameTime (ms)CPU (ms)Iterations
        DL_MODEL_TINYLLAMA/scalar1.73e+051.73e+051
        DL_MODEL_TINYLLAMA/matmul_opt1.05e+041.05e+041
        DL_MODEL_TINYLLAMA/matmul_opt_omp8.22e+037.61e+031
        Console output -
        2025-05-26T21:03:18+00:00
        +
        2025-06-01T09:33:00+00:00
         Running ./dl-model-tinyllama-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -24,14 +25,14 @@ 

        dl-model-tinyllama-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.60, 2.10, 2.74 +Load Average: 2.76, 5.10, 6.18 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ---------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------- -DL_MODEL_TINYLLAMA/scalar 145313 ms 145306 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt 9843 ms 9843 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt_omp 7800 ms 7157 ms 1 +DL_MODEL_TINYLLAMA/scalar 172638 ms 172634 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt 10491 ms 10491 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt_omp 8219 ms 7607 ms 1 ---------- Verification ---------- matmul_opt PASS matmul_opt_omp PASS diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index bf0249ff..0f2ff8c8 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-model-whisper-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-model-whisper-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-model-whisper-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        DL_MODEL_Whisper/Auto_Vectorization80864.480855.31
        DL_MODEL_Whisper/Buddy_Vectorization35875.835871.51
        + + +
        NameTime (ms)CPU (ms)Iterations
        DL_MODEL_Whisper/Auto_Vectorization7.92e+047.92e+041
        DL_MODEL_Whisper/Buddy_Vectorization3.69e+043.69e+041
        Console output -
        2025-05-26T21:08:38+00:00
        +
        2025-06-01T09:39:22+00:00
         Running ./dl-model-whisper-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-model-whisper-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.37, 1.45, 2.25 +Load Average: 2.00, 3.04, 4.84 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_MODEL_Whisper/Auto_Vectorization 80864 ms 80855 ms 1 -DL_MODEL_Whisper/Buddy_Vectorization 35876 ms 35871 ms 1 +DL_MODEL_Whisper/Auto_Vectorization 79216 ms 79213 ms 1 +DL_MODEL_Whisper/Buddy_Vectorization 36910 ms 36904 ms 1 ----------------------------------------------------------- Correctness Verification for Output1: PASS Correctness Verification for Output2: FAIL diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index e2bc4e0c..bd9433e9 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-arithaddf-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-arithaddf-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-arithaddf-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_ADDF_SCALAR0.00.023951
        BM_ADDF_AutoVectorization0.00.0174606
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_ADDF_SCALAR0.02990.029923,357
        BM_ADDF_AutoVectorization0.003990.00399164,695
        Console output -
        2025-05-26T21:13:08+00:00
        +
        2025-06-01T09:43:50+00:00
         Running ./dl-op-linalg-arithaddf-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-arithaddf-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.92 +Load Average: 1.01, 1.84, 3.88 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_ADDF_SCALAR 0.029 ms 0.029 ms 23951 -BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 174606 +BM_ADDF_SCALAR 0.030 ms 0.030 ms 23357 +BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 164695 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index cbbc3b68..570398f8 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-arithdivf-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-arithdivf-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-arithdivf-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_DIVF_SCALAR0.00.022508
        BM_DIVF_AutoVectorization0.00.073818
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_DIVF_SCALAR0.02930.029323,918
        BM_DIVF_AutoVectorization0.01030.010373,794
        Console output -
        2025-05-26T21:13:11+00:00
        +
        2025-06-01T09:43:53+00:00
         Running ./dl-op-linalg-arithdivf-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-arithdivf-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.92 +Load Average: 1.01, 1.82, 3.87 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_DIVF_SCALAR 0.030 ms 0.030 ms 22508 -BM_DIVF_AutoVectorization 0.010 ms 0.010 ms 73818 +BM_DIVF_SCALAR 0.029 ms 0.029 ms 23918 +BM_DIVF_AutoVectorization 0.010 ms 0.010 ms 73794 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 448c1217..e3f8afb0 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-arithmulf-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-arithmulf-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-arithmulf-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_MULF_SCALAR0.00.023392
        BM_MULF_AutoVectorization0.00.0175155
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_MULF_SCALAR0.02960.029623,548
        BM_MULF_AutoVectorization0.003990.00399146,698
        Console output -
        2025-05-26T21:13:12+00:00
        +
        2025-06-01T09:43:54+00:00
         Running ./dl-op-linalg-arithmulf-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-arithmulf-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.92 +Load Average: 1.01, 1.82, 3.87 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_MULF_SCALAR 0.029 ms 0.029 ms 23392 -BM_MULF_AutoVectorization 0.004 ms 0.004 ms 175155 +BM_MULF_SCALAR 0.030 ms 0.030 ms 23548 +BM_MULF_AutoVectorization 0.004 ms 0.004 ms 146698 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index e778faac..0d6c82f8 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-arithnegf-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-arithnegf-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-arithnegf-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_NEGF_SCALAR0.00.030765
        BM_NEGF_AutoVectorization0.00.0290149
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_NEGF_SCALAR0.02280.022830,522
        BM_NEGF_AutoVectorization0.002490.00249279,150
        Console output -
        2025-05-26T21:13:15+00:00
        +
        2025-06-01T09:43:57+00:00
         Running ./dl-op-linalg-arithnegf-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-arithnegf-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.92 +Load Average: 1.01, 1.81, 3.85 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_NEGF_SCALAR 0.023 ms 0.023 ms 30765 -BM_NEGF_AutoVectorization 0.002 ms 0.002 ms 290149 +BM_NEGF_SCALAR 0.023 ms 0.023 ms 30522 +BM_NEGF_AutoVectorization 0.002 ms 0.002 ms 279150 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index 77a5298c..731d0f82 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-arithsubf-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-arithsubf-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-arithsubf-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_SUBF_SCALAR0.00.023979
        BM_SUBF_AutoVectorization0.00.0175235
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_SUBF_SCALAR0.02930.029323,583
        BM_SUBF_AutoVectorization0.003990.00399175,569
        Console output -
        2025-05-26T21:13:17+00:00
        +
        2025-06-01T09:43:59+00:00
         Running ./dl-op-linalg-arithsubf-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-arithsubf-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.91 +Load Average: 1.01, 1.81, 3.85 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_SUBF_SCALAR 0.030 ms 0.030 ms 23979 -BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 175235 +BM_SUBF_SCALAR 0.029 ms 0.029 ms 23583 +BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 175569 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 16e23a1f..4e069683 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -7,20 +7,21 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-batch-matmul-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-batch-matmul-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-batch-matmul-benchmark.json

        - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_BATCH_MATMUL/Scalar/iterations:13551.43551.01
        DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:11002.21002.11
        DL_OPS_BATCH_MATMUL/Vectorization/iterations:1191.7191.71
        DL_OPS_BATCH_MATMUL/Tile/iterations:1109.6109.61
        DL_OPS_BATCH_MATMUL/SCF/iterations:1117.3117.31
        DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1356.3356.21
        DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:179.131.51
        + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        DL_OPS_BATCH_MATMUL/Scalar/iterations:13.53e+033.53e+031
        DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:19749741
        DL_OPS_BATCH_MATMUL/Vectorization/iterations:11911911
        DL_OPS_BATCH_MATMUL/Tile/iterations:11091091
        DL_OPS_BATCH_MATMUL/SCF/iterations:11171171
        DL_OPS_BATCH_MATMUL/BROADCAST/iterations:13513511
        DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:180.632.11
        Console output -
        2025-05-26T21:12:58+00:00
        +
        2025-06-01T09:43:41+00:00
         Running ./dl-op-linalg-batch-matmul-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -28,18 +29,18 @@ 

        dl-op-linalg-batch-matmul-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.93 +Load Average: 1.01, 1.87, 3.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3551 ms 3551 ms 1 -DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 1002 ms 1002 ms 1 -DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 192 ms 192 ms 1 -DL_OPS_BATCH_MATMUL/Tile/iterations:1 110 ms 110 ms 1 +DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3529 ms 3529 ms 1 +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 974 ms 974 ms 1 +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 191 ms 191 ms 1 +DL_OPS_BATCH_MATMUL/Tile/iterations:1 109 ms 109 ms 1 DL_OPS_BATCH_MATMUL/SCF/iterations:1 117 ms 117 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 356 ms 356 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 79.1 ms 31.5 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 351 ms 351 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 80.6 ms 32.1 ms 1 ---------- Verification ---------- Tile PASS SCF PASS diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index c17394a8..ccc101b1 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_Conv2DNchwFchw_SCALAR282.0282.02
        BM_Conv2DNchwFchw_Im2col11.811.862
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_Conv2DNchwFchw_SCALAR2832832
        BM_Conv2DNchwFchw_Im2col10.110.169
        Console output -
        2025-05-26T21:12:52+00:00
        +
        2025-06-01T09:43:34+00:00
         Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-conv2d-nchw-fchw-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.94 +Load Average: 1.02, 1.88, 3.93 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- -BM_Conv2DNchwFchw_SCALAR 282 ms 282 ms 2 -BM_Conv2DNchwFchw_Im2col 11.8 ms 11.8 ms 62 +BM_Conv2DNchwFchw_SCALAR 283 ms 283 ms 2 +BM_Conv2DNchwFchw_Im2col 10.1 ms 10.1 ms 69 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index bc9c8cc3..c4f23e37 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -7,17 +7,18 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        - - - - -
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:573.773.65
        DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:59.39.35
        DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:51.71.75
        DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:51.71.75
        + + + + +
        NameTime (ms)CPU (ms)Iterations
        DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.572.55
        DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:59.359.355
        DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:51.731.735
        DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:51.731.735
        Console output -
        2025-05-26T21:12:56+00:00
        +
        2025-06-01T09:43:38+00:00
         Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -25,14 +26,14 @@ 

        dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.93 +Load Average: 1.01, 1.87, 3.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------------- -DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 73.7 ms 73.6 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.34 ms 9.34 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.74 ms 1.74 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 72.5 ms 72.5 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.35 ms 9.35 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.73 ms 1.73 ms 5 DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.73 ms 1.73 ms 5 ---------- Verification ---------- auto_vectorization PASS diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index 756eb29a..b1f4f3e2 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_CONV_2D_NHWC_HWCF_SCALAR32.632.622
        BM_CONV_2D_NHWC_HWCF_AutoVectorization6.16.1115
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_CONV_2D_NHWC_HWCF_SCALAR32.432.421
        BM_CONV_2D_NHWC_HWCF_AutoVectorization6.166.16114
        Console output -
        2025-05-26T21:12:54+00:00
        +
        2025-06-01T09:43:36+00:00
         Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.94 +Load Average: 1.02, 1.88, 3.93 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------- -BM_CONV_2D_NHWC_HWCF_SCALAR 32.6 ms 32.6 ms 22 -BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.07 ms 6.07 ms 115 +BM_CONV_2D_NHWC_HWCF_SCALAR 32.4 ms 32.4 ms 21 +BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.16 ms 6.16 ms 114 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 7f08f18d..96e9e457 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -7,16 +7,17 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        - - - -
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.84.85
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:51.71.75
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:50.10.15
        + + + +
        NameTime (ms)CPU (ms)Iterations
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:57.277.275
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:51.681.685
        DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:50.1270.1275
        Console output -
        2025-05-26T21:12:56+00:00
        +
        2025-06-01T09:43:38+00:00
         Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -24,14 +25,14 @@ 

        dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.93 +Load Average: 1.01, 1.87, 3.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------ -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 4.84 ms 4.84 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 7.27 ms 7.27 ms 5 DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.68 ms 1.68 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.120 ms 0.120 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.127 ms 0.127 ms 5 ---------- Verification ---------- auto_vectorization PASS vectorization PASS diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index 341c32bd..b322f413 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-mathexp-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-mathexp-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-mathexp-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_EXP_SCALAR0.00.015289
        BM_EXP_AutoVectorization0.00.022195
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_EXP_SCALAR0.04560.045615,072
        BM_EXP_AutoVectorization0.03160.031622,245
        Console output -
        2025-05-26T21:13:22+00:00
        +
        2025-06-01T09:44:04+00:00
         Running ./dl-op-linalg-mathexp-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-mathexp-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.18, 1.91 +Load Average: 1.01, 1.80, 3.84 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- -BM_EXP_SCALAR 0.046 ms 0.046 ms 15289 -BM_EXP_AutoVectorization 0.032 ms 0.032 ms 22195 +BM_EXP_SCALAR 0.046 ms 0.046 ms 15072 +BM_EXP_AutoVectorization 0.032 ms 0.032 ms 22245 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 652a8f96..7fb4b696 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-mathfpow-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-mathfpow-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-mathfpow-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_FPOW_SCALAR0.10.18096
        BM_FPOW_AutoVectorization0.10.112303
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_FPOW_SCALAR0.08520.08528,120
        BM_FPOW_AutoVectorization0.05690.056912,142
        Console output -
        2025-05-26T21:13:19+00:00
        +
        2025-06-01T09:44:01+00:00
         Running ./dl-op-linalg-mathfpow-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-mathfpow-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.91 +Load Average: 1.01, 1.81, 3.85 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_FPOW_SCALAR 0.083 ms 0.083 ms 8096 -BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12303 +BM_FPOW_SCALAR 0.085 ms 0.085 ms 8120 +BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12142 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index 8c7d6d72..2655a6f8 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-mathrsqrt-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_RSQRT_SCALAR0.10.19407
        BM_RSQRT_AutoVectorization0.00.0161010
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_RSQRT_SCALAR0.07280.07289,624
        BM_RSQRT_AutoVectorization0.004340.00434160,866
        Console output -
        2025-05-26T21:13:20+00:00
        +
        2025-06-01T09:44:02+00:00
         Running ./dl-op-linalg-mathrsqrt-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-mathrsqrt-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.18, 1.91 +Load Average: 1.01, 1.80, 3.84 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9407 -BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 161010 +BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9624 +BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 160866 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index 44d5a587..35a9e0c9 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -7,18 +7,19 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-matmul-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-matmul-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-matmul-benchmark.json

        - - - - - -
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL/scalar_O0/iterations:14219.84219.71
        DL_OPS_MATMUL/scalar_O3/iterations:13393.83393.61
        DL_OPS_MATMUL/tile/iterations:1117.1117.11
        DL_OPS_MATMUL/vec/iterations:159.959.91
        DL_OPS_MATMUL/vec_omp/iterations:130.59.81
        + + + + + +
        NameTime (ms)CPU (ms)Iterations
        DL_OPS_MATMUL/scalar_O0/iterations:14.1e+034.1e+031
        DL_OPS_MATMUL/scalar_O3/iterations:13.12e+033.12e+031
        DL_OPS_MATMUL/tile/iterations:11101101
        DL_OPS_MATMUL/vec/iterations:159591
        DL_OPS_MATMUL/vec_omp/iterations:128.8111
        Console output -
        2025-05-26T21:12:40+00:00
        +
        2025-06-01T09:43:23+00:00
         Running ./dl-op-linalg-matmul-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -26,16 +27,16 @@ 

        dl-op-linalg-matmul-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.19, 1.95 +Load Average: 1.02, 1.91, 3.96 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_OPS_MATMUL/scalar_O0/iterations:1 4220 ms 4220 ms 1 -DL_OPS_MATMUL/scalar_O3/iterations:1 3394 ms 3394 ms 1 -DL_OPS_MATMUL/tile/iterations:1 117 ms 117 ms 1 -DL_OPS_MATMUL/vec/iterations:1 59.9 ms 59.9 ms 1 -DL_OPS_MATMUL/vec_omp/iterations:1 30.5 ms 9.79 ms 1 +DL_OPS_MATMUL/scalar_O0/iterations:1 4096 ms 4096 ms 1 +DL_OPS_MATMUL/scalar_O3/iterations:1 3124 ms 3124 ms 1 +DL_OPS_MATMUL/tile/iterations:1 110 ms 110 ms 1 +DL_OPS_MATMUL/vec/iterations:1 59.0 ms 59.0 ms 1 +DL_OPS_MATMUL/vec_omp/iterations:1 28.8 ms 11.0 ms 1 ---------- Verification ---------- tile PASS vec PASS diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index a958f85d..3c92e8df 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-pooling-nhwc-sum-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_POOLING_NHWC_SUM_SCALAR0.20.22972
        BM_POOLING_NHWC_SUM_AutoVectorization0.00.016865
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_POOLING_NHWC_SUM_SCALAR0.2330.2332,993
        BM_POOLING_NHWC_SUM_AutoVectorization0.04140.041416,954
        Console output -
        2025-05-26T21:12:56+00:00
        +
        2025-06-01T09:43:38+00:00
         Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-pooling-nhwc-sum-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.93 +Load Average: 1.01, 1.87, 3.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_POOLING_NHWC_SUM_SCALAR 0.234 ms 0.234 ms 2972 -BM_POOLING_NHWC_SUM_AutoVectorization 0.041 ms 0.041 ms 16865 +BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 2993 +BM_POOLING_NHWC_SUM_AutoVectorization 0.041 ms 0.041 ms 16954 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html new file mode 100644 index 00000000..1b937e8f --- /dev/null +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -0,0 +1,26 @@ + + + +

        deeplearning/dl-op-linalg-reduceaddf-benchmark.json

        2025-06-01 10:22:14 UTC

        +
        ⚠ FAILED: JSON parse error: Expecting value
        +
        Console output +
        2025-06-01T09:44:07+00:00
        +Running ./dl-op-linalg-reduceaddf-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.01, 1.78, 3.82
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html new file mode 100644 index 00000000..24055ad2 --- /dev/null +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html @@ -0,0 +1,26 @@ + + + +

        deeplearning/dl-op-linalg-reducemaxf-benchmark.json

        2025-06-01 10:22:14 UTC

        +
        ⚠ FAILED: JSON parse error: Expecting value
        +
        Console output +
        2025-06-01T09:44:07+00:00
        +Running ./dl-op-linalg-reducemaxf-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.01, 1.78, 3.82
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +
        \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html index cbb9f3fb..398f1399 100644 --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-linalg-softmax-exp-sum-div-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        BM_SOFTMAXEXPSUMDIV_SCALAR0.00.0125501
        BM_SOFTMAXEXPSUMDIV_AutoVectorization0.00.0181845
        + + +
        NameTime (ms)CPU (ms)Iterations
        BM_SOFTMAXEXPSUMDIV_SCALAR0.005590.00559123,186
        BM_SOFTMAXEXPSUMDIV_AutoVectorization0.003850.00385182,176
        Console output -
        2025-05-26T21:13:25+00:00
        +
        2025-06-01T09:44:07+00:00
         Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-linalg-softmax-exp-sum-div-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.18, 1.91 +Load Average: 1.01, 1.78, 3.82 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 125501 -BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 181845 +BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 123186 +BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 182176 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 29d351a3..950f277f 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -7,17 +7,18 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-matmul-transpose-b-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-matmul-transpose-b-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-matmul-transpose-b-benchmark.json

        - - - - -
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51265.31263.65
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5326.8326.85
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:530.318.85
        DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:583.683.65
        + + + + +
        NameTime (ms)CPU (ms)Iterations
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.04e+035
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:52772775
        DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:530.319.45
        DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:584.984.95
        Console output -
        2025-05-26T21:13:28+00:00
        +
        2025-06-01T09:44:10+00:00
         Running ./dl-op-matmul-transpose-b-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -25,15 +26,15 @@ 

        dl-op-matmul-transpose-b-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.07, 1.18, 1.91 +Load Average: 1.01, 1.78, 3.82 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------------------- -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1265 ms 1264 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 327 ms 327 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 30.3 ms 18.8 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 83.6 ms 83.6 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1045 ms 1044 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 277 ms 277 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 30.3 ms 19.4 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 84.9 ms 84.9 ms 5 ---------- Verification ---------- scalar_O3 PASS scalar_O3_omp PASS diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index 6a6cea80..5dd32b66 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -7,15 +7,16 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        deeplearning/dl-op-tosa-transpose-benchmark.json

        2025-05-26 22:41:02 UTC

        +

        deeplearning/dl-op-tosa-transpose-benchmark.json

        2025-06-01 10:22:14 UTC

        dl-op-tosa-transpose-benchmark.json

        - - -
        NameTime (ns)CPU (ns)Iterations
        DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:529.720.95
        DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:521.920.45
        + + +
        NameTime (ms)CPU (ms)Iterations
        DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:524.917.75
        DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:517.915.35
        Console output -
        2025-05-26T21:13:27+00:00
        +
        2025-06-01T09:44:09+00:00
         Running ./dl-op-tosa-transpose-benchmark
         Run on (24 X 5100 MHz CPU s)
         CPU Caches:
        @@ -23,13 +24,13 @@ 

        dl-op-tosa-transpose-benchmark.json

        L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.07, 1.18, 1.91 +Load Average: 1.01, 1.78, 3.82 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------- -DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 29.7 ms 20.9 ms 5 -DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 21.9 ms 20.4 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 24.9 ms 17.7 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 17.9 ms 15.3 ms 5 ---------- Verification ---------- scalar_O3 PASS
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 7e4128c0..61c39a76 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.40.41663
        Buddy_Corr2D_Constant_Padding/11.11.1651
        OpenCV_Filter2D_Constant_Padding/11.91.9367
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14748
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32632
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102984
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048927
        Buddy_Erosion2D_Constant_Padding/10.20.23155
        Buddy_Dilation2D_Constant_Padding/10.20.23149
        Buddy_Opening2D_Constant_Padding/10.40.42068
        Buddy_Closing2D_Constant_Padding/10.40.41957
        Buddy_TopHat2D_Constant_Padding/10.90.9731
        Buddy_BottomHat2D_Constant_Padding/10.90.9729
        OpenCV_Erode2D_Constant_Padding/10.10.15005
        OpenCV_Opening2D_Constant_Padding/10.20.23076
        OpenCV_Closing2D_Constant_Padding/10.20.23193
        OpenCV_TopHat2D_Constant_Padding/10.30.32760
        OpenCV_BottomHat2D_Constant_Padding/10.30.32736
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32716
        OpenCV_Dilate2D_Constant_Padding/10.10.14963
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/14.884.88143
        MLIR_Conv2D/17.197.1997
        Buddy_Conv2D/10.4180.4181,675
        Buddy_Corr2D_Constant_Padding/11.061.06666
        OpenCV_Filter2D_Constant_Padding/11.861.86376
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,817
        Buddy_Resize2D_Bilinear_Interpolation/10.260.262,689
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,253
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,956
        Buddy_Erosion2D_Constant_Padding/10.2150.2153,274
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,255
        Buddy_Opening2D_Constant_Padding/10.3140.3142,261
        Buddy_Closing2D_Constant_Padding/10.3180.3182,259
        Buddy_TopHat2D_Constant_Padding/10.810.81835
        Buddy_BottomHat2D_Constant_Padding/10.7870.787848
        OpenCV_Erode2D_Constant_Padding/10.1370.1375,119
        OpenCV_Opening2D_Constant_Padding/10.2260.2263,092
        OpenCV_Closing2D_Constant_Padding/10.2270.2273,082
        OpenCV_TopHat2D_Constant_Padding/10.260.262,689
        OpenCV_BottomHat2D_Constant_Padding/10.2590.2592,705
        OpenCV_MorphGrad2D_Constant_Padding/10.2520.2522,779
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,113
        +
        Console output +
        2025-06-01T10:09:28+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.13, 1.85
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          143
        +MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
        +Buddy_Conv2D/1                                         0.418 ms        0.418 ms         1675
        +Buddy_Corr2D_Constant_Padding/1                         1.06 ms         1.06 ms          666
        +OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4817
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2689
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105253
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49956
        +Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3274
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3255
        +Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2261
        +Buddy_Closing2D_Constant_Padding/1                     0.318 ms        0.318 ms         2259
        +Buddy_TopHat2D_Constant_Padding/1                      0.810 ms        0.810 ms          835
        +Buddy_BottomHat2D_Constant_Padding/1                   0.787 ms        0.787 ms          848
        +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5119
        +OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3092
        +OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3082
        +OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2689
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.259 ms        0.259 ms         2705
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.252 ms        0.252 ms         2779
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5113
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..097b55cc --- /dev/null +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

        imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        +

        AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/14.884.88143
        MLIR_Conv2D/17.187.1897
        Buddy_Conv2D/10.4180.4181,675
        Buddy_Corr2D_Constant_Padding/11.061.06662
        OpenCV_Filter2D_Constant_Padding/11.861.86376
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
        Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,155
        OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,833
        Buddy_Erosion2D_Constant_Padding/10.2150.2153,267
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,262
        Buddy_Opening2D_Constant_Padding/10.3090.3092,259
        Buddy_Closing2D_Constant_Padding/10.3110.3112,232
        Buddy_TopHat2D_Constant_Padding/10.8010.801854
        Buddy_BottomHat2D_Constant_Padding/10.7950.795833
        OpenCV_Erode2D_Constant_Padding/10.1360.1365,118
        OpenCV_Opening2D_Constant_Padding/10.2240.2243,117
        OpenCV_Closing2D_Constant_Padding/10.2260.2263,087
        OpenCV_TopHat2D_Constant_Padding/10.2560.2562,731
        OpenCV_BottomHat2D_Constant_Padding/10.260.262,693
        OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,799
        OpenCV_Dilate2D_Constant_Padding/10.1390.1395,051
        +
        Console output +
        2025-06-01T10:09:52+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.12, 1.83
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          143
        +MLIR_Conv2D/1                                           7.18 ms         7.18 ms           97
        +Buddy_Conv2D/1                                         0.418 ms        0.418 ms         1675
        +Buddy_Corr2D_Constant_Padding/1                         1.06 ms         1.06 ms          662
        +OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105155
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49833
        +Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3267
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3262
        +Buddy_Opening2D_Constant_Padding/1                     0.309 ms        0.309 ms         2259
        +Buddy_Closing2D_Constant_Padding/1                     0.311 ms        0.311 ms         2232
        +Buddy_TopHat2D_Constant_Padding/1                      0.801 ms        0.801 ms          854
        +Buddy_BottomHat2D_Constant_Padding/1                   0.795 ms        0.795 ms          833
        +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5118
        +OpenCV_Opening2D_Constant_Padding/1                    0.224 ms        0.224 ms         3117
        +OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3087
        +OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2731
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2693
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2799
        +OpenCV_Dilate2D_Constant_Padding/1                     0.139 ms        0.139 ms         5051
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 931ee0a4..8b379ccc 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        MLIR_Conv2D/129.929.924
        Buddy_Conv2D/11.01.0751
        Buddy_Corr2D_Constant_Padding/11.81.8391
        OpenCV_Filter2D_Constant_Padding/12.72.7255
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14778
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32630
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103033
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048860
        Buddy_Erosion2D_Constant_Padding/10.20.23134
        Buddy_Dilation2D_Constant_Padding/10.20.22971
        Buddy_Opening2D_Constant_Padding/10.30.31881
        Buddy_Closing2D_Constant_Padding/10.30.32009
        Buddy_TopHat2D_Constant_Padding/10.90.9714
        Buddy_BottomHat2D_Constant_Padding/10.90.9723
        OpenCV_Erode2D_Constant_Padding/10.10.14965
        OpenCV_Opening2D_Constant_Padding/10.20.23088
        OpenCV_Closing2D_Constant_Padding/10.20.23129
        OpenCV_TopHat2D_Constant_Padding/10.30.32625
        OpenCV_BottomHat2D_Constant_Padding/10.30.32625
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32692
        OpenCV_Dilate2D_Constant_Padding/10.10.15053
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/111.511.561
        MLIR_Conv2D/1292925
        Buddy_Conv2D/11.111.11632
        Buddy_Corr2D_Constant_Padding/11.741.74400
        OpenCV_Filter2D_Constant_Padding/12.682.68262
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,855
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,692
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006630.00663105,416
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,870
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,258
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,246
        Buddy_Opening2D_Constant_Padding/10.320.322,260
        Buddy_Closing2D_Constant_Padding/10.310.312,223
        Buddy_TopHat2D_Constant_Padding/10.8060.806827
        Buddy_BottomHat2D_Constant_Padding/10.820.82852
        OpenCV_Erode2D_Constant_Padding/10.1370.1375,096
        OpenCV_Opening2D_Constant_Padding/10.2230.2233,136
        OpenCV_Closing2D_Constant_Padding/10.2270.2273,085
        OpenCV_TopHat2D_Constant_Padding/10.260.262,693
        OpenCV_BottomHat2D_Constant_Padding/10.260.262,686
        OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,746
        OpenCV_Dilate2D_Constant_Padding/10.1340.1345,208
        +
        Console output +
        2025-06-01T10:10:17+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.11, 1.81
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           61
        +MLIR_Conv2D/1                                           29.0 ms         29.0 ms           25
        +Buddy_Conv2D/1                                          1.11 ms         1.11 ms          632
        +Buddy_Corr2D_Constant_Padding/1                         1.74 ms         1.74 ms          400
        +OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          262
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4855
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2692
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105416
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49870
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3258
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3246
        +Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2260
        +Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2223
        +Buddy_TopHat2D_Constant_Padding/1                      0.806 ms        0.806 ms          827
        +Buddy_BottomHat2D_Constant_Padding/1                   0.820 ms        0.820 ms          852
        +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5096
        +OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3136
        +OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3085
        +OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2693
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2686
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2746
        +OpenCV_Dilate2D_Constant_Padding/1                     0.134 ms        0.134 ms         5208
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index f51cdc17..5b9a66e0 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.911.960
        MLIR_Conv2D/129.929.924
        Buddy_Conv2D/11.01.0738
        Buddy_Corr2D_Constant_Padding/11.81.8393
        OpenCV_Filter2D_Constant_Padding/12.82.8255
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14756
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32630
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103027
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048458
        Buddy_Erosion2D_Constant_Padding/10.20.23152
        Buddy_Dilation2D_Constant_Padding/10.20.23142
        Buddy_Opening2D_Constant_Padding/10.30.32069
        Buddy_Closing2D_Constant_Padding/10.30.32004
        Buddy_TopHat2D_Constant_Padding/10.90.9753
        Buddy_BottomHat2D_Constant_Padding/10.90.9762
        OpenCV_Erode2D_Constant_Padding/10.10.14961
        OpenCV_Opening2D_Constant_Padding/10.20.23152
        OpenCV_Closing2D_Constant_Padding/10.20.22470
        OpenCV_TopHat2D_Constant_Padding/10.30.32673
        OpenCV_BottomHat2D_Constant_Padding/10.30.32749
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32717
        OpenCV_Dilate2D_Constant_Padding/10.10.15082
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/111.611.661
        MLIR_Conv2D/1292925
        Buddy_Conv2D/11.021.02685
        Buddy_Corr2D_Constant_Padding/11.751.75400
        OpenCV_Filter2D_Constant_Padding/12.682.68261
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,858
        Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,372
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,847
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,249
        Buddy_Dilation2D_Constant_Padding/10.2130.2133,265
        Buddy_Opening2D_Constant_Padding/10.3140.3142,214
        Buddy_Closing2D_Constant_Padding/10.3080.3082,229
        Buddy_TopHat2D_Constant_Padding/10.790.79828
        Buddy_BottomHat2D_Constant_Padding/10.7770.777854
        OpenCV_Erode2D_Constant_Padding/10.1370.1375,075
        OpenCV_Opening2D_Constant_Padding/10.2250.2253,111
        OpenCV_Closing2D_Constant_Padding/10.2290.2293,056
        OpenCV_TopHat2D_Constant_Padding/10.2620.2622,672
        OpenCV_BottomHat2D_Constant_Padding/10.2640.2642,653
        OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,750
        OpenCV_Dilate2D_Constant_Padding/10.1350.1355,201
        +
        Console output +
        2025-06-01T10:10:41+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.10, 1.79
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
        +MLIR_Conv2D/1                                           29.0 ms         29.0 ms           25
        +Buddy_Conv2D/1                                          1.02 ms         1.02 ms          685
        +Buddy_Corr2D_Constant_Padding/1                         1.75 ms         1.75 ms          400
        +OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4858
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105372
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49847
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3249
        +Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3265
        +Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2214
        +Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2229
        +Buddy_TopHat2D_Constant_Padding/1                      0.790 ms        0.790 ms          828
        +Buddy_BottomHat2D_Constant_Padding/1                   0.777 ms        0.777 ms          854
        +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5075
        +OpenCV_Opening2D_Constant_Padding/1                    0.225 ms        0.225 ms         3111
        +OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3056
        +OpenCV_TopHat2D_Constant_Padding/1                     0.262 ms        0.262 ms         2672
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.264 ms        0.264 ms         2653
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2750
        +OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5201
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a804169e..5db498f4 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.30.32234
        Buddy_Corr2D_Constant_Padding/10.80.8850
        OpenCV_Filter2D_Constant_Padding/11.31.3546
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14747
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32629
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102542
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048907
        Buddy_Erosion2D_Constant_Padding/10.20.23132
        Buddy_Dilation2D_Constant_Padding/10.20.23121
        Buddy_Opening2D_Constant_Padding/10.30.32059
        Buddy_Closing2D_Constant_Padding/10.30.32107
        Buddy_TopHat2D_Constant_Padding/10.90.9725
        Buddy_BottomHat2D_Constant_Padding/10.90.9744
        OpenCV_Erode2D_Constant_Padding/10.10.15040
        OpenCV_Opening2D_Constant_Padding/10.20.23222
        OpenCV_Closing2D_Constant_Padding/10.20.23151
        OpenCV_TopHat2D_Constant_Padding/10.30.32758
        OpenCV_BottomHat2D_Constant_Padding/10.30.32688
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32740
        OpenCV_Dilate2D_Constant_Padding/10.10.14990
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/14.874.87144
        MLIR_Conv2D/17.387.3895
        Buddy_Conv2D/10.3120.3122,241
        Buddy_Corr2D_Constant_Padding/10.8210.821849
        OpenCV_Filter2D_Constant_Padding/11.281.28547
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1460.1464,774
        Buddy_Resize2D_Bilinear_Interpolation/10.2670.2672,627
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00670.0067103,069
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,911
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,259
        Buddy_Dilation2D_Constant_Padding/10.2150.2153,230
        Buddy_Opening2D_Constant_Padding/10.3090.3092,255
        Buddy_Closing2D_Constant_Padding/10.3090.3092,273
        Buddy_TopHat2D_Constant_Padding/10.7760.776855
        Buddy_BottomHat2D_Constant_Padding/10.7740.774856
        OpenCV_Erode2D_Constant_Padding/10.1360.1365,148
        OpenCV_Opening2D_Constant_Padding/10.2190.2193,185
        OpenCV_Closing2D_Constant_Padding/10.2230.2233,143
        OpenCV_TopHat2D_Constant_Padding/10.2590.2592,699
        OpenCV_BottomHat2D_Constant_Padding/10.2580.2582,714
        OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,791
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,112
        +
        Console output +
        2025-06-01T10:05:28+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.19, 1.29, 2.11
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      4.87 ms         4.87 ms          144
        +MLIR_Conv2D/1                                           7.38 ms         7.38 ms           95
        +Buddy_Conv2D/1                                         0.312 ms        0.312 ms         2241
        +Buddy_Corr2D_Constant_Padding/1                        0.821 ms        0.821 ms          849
        +OpenCV_Filter2D_Constant_Padding/1                      1.28 ms         1.28 ms          547
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.146 ms        0.146 ms         4774
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.267 ms        0.267 ms         2627
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       103069
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49911
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3259
        +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3230
        +Buddy_Opening2D_Constant_Padding/1                     0.309 ms        0.309 ms         2255
        +Buddy_Closing2D_Constant_Padding/1                     0.309 ms        0.309 ms         2273
        +Buddy_TopHat2D_Constant_Padding/1                      0.776 ms        0.776 ms          855
        +Buddy_BottomHat2D_Constant_Padding/1                   0.774 ms        0.774 ms          856
        +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5148
        +OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3185
        +OpenCV_Closing2D_Constant_Padding/1                    0.223 ms        0.223 ms         3143
        +OpenCV_TopHat2D_Constant_Padding/1                     0.259 ms        0.259 ms         2699
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.258 ms        0.258 ms         2714
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2791
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5112
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index f4c10989..7d89d00b 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.30.32233
        Buddy_Corr2D_Constant_Padding/10.80.8856
        OpenCV_Filter2D_Constant_Padding/11.31.3547
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14751
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32631
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103135
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048881
        Buddy_Erosion2D_Constant_Padding/10.20.23158
        Buddy_Dilation2D_Constant_Padding/10.20.23109
        Buddy_Opening2D_Constant_Padding/10.30.32007
        Buddy_Closing2D_Constant_Padding/10.30.32062
        Buddy_TopHat2D_Constant_Padding/10.90.9732
        Buddy_BottomHat2D_Constant_Padding/10.90.9731
        OpenCV_Erode2D_Constant_Padding/10.10.15071
        OpenCV_Opening2D_Constant_Padding/10.20.23192
        OpenCV_Closing2D_Constant_Padding/10.20.23221
        OpenCV_TopHat2D_Constant_Padding/10.30.32767
        OpenCV_BottomHat2D_Constant_Padding/10.30.32752
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32708
        OpenCV_Dilate2D_Constant_Padding/10.10.14910
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/14.94.9144
        MLIR_Conv2D/17.217.2197
        Buddy_Conv2D/10.3110.312,257
        Buddy_Corr2D_Constant_Padding/10.7980.798878
        OpenCV_Filter2D_Constant_Padding/11.251.25560
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,872
        Buddy_Resize2D_Bilinear_Interpolation/10.2620.2622,654
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,278
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,913
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,221
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,271
        Buddy_Opening2D_Constant_Padding/10.3160.3162,213
        Buddy_Closing2D_Constant_Padding/10.3110.3112,230
        Buddy_TopHat2D_Constant_Padding/10.80.8866
        Buddy_BottomHat2D_Constant_Padding/10.7970.797846
        OpenCV_Erode2D_Constant_Padding/10.1380.1385,058
        OpenCV_Opening2D_Constant_Padding/10.2220.2223,149
        OpenCV_Closing2D_Constant_Padding/10.2210.2213,169
        OpenCV_TopHat2D_Constant_Padding/10.2570.2572,725
        OpenCV_BottomHat2D_Constant_Padding/10.2570.2572,715
        OpenCV_MorphGrad2D_Constant_Padding/10.250.252,798
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,116
        +
        Console output +
        2025-06-01T10:05:52+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.20, 1.29, 2.09
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      4.90 ms         4.90 ms          144
        +MLIR_Conv2D/1                                           7.21 ms         7.21 ms           97
        +Buddy_Conv2D/1                                         0.311 ms        0.310 ms         2257
        +Buddy_Corr2D_Constant_Padding/1                        0.798 ms        0.798 ms          878
        +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4872
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.262 ms        0.262 ms         2654
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105278
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49913
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3221
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3271
        +Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2213
        +Buddy_Closing2D_Constant_Padding/1                     0.311 ms        0.311 ms         2230
        +Buddy_TopHat2D_Constant_Padding/1                      0.800 ms        0.800 ms          866
        +Buddy_BottomHat2D_Constant_Padding/1                   0.797 ms        0.797 ms          846
        +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5058
        +OpenCV_Opening2D_Constant_Padding/1                    0.222 ms        0.222 ms         3149
        +OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3169
        +OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2725
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.257 ms        0.257 ms         2715
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2798
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5116
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 9b2f65b9..b7a6d9a1 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0139
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.30.32239
        Buddy_Corr2D_Constant_Padding/10.80.8863
        OpenCV_Filter2D_Constant_Padding/11.31.3548
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14797
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32637
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103585
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048856
        Buddy_Erosion2D_Constant_Padding/10.20.23141
        Buddy_Dilation2D_Constant_Padding/10.20.23162
        Buddy_Opening2D_Constant_Padding/10.30.32082
        Buddy_Closing2D_Constant_Padding/10.30.32093
        Buddy_TopHat2D_Constant_Padding/10.90.9727
        Buddy_BottomHat2D_Constant_Padding/10.90.9732
        OpenCV_Erode2D_Constant_Padding/10.10.15002
        OpenCV_Opening2D_Constant_Padding/10.20.23104
        OpenCV_Closing2D_Constant_Padding/10.20.23100
        OpenCV_TopHat2D_Constant_Padding/10.30.32764
        OpenCV_BottomHat2D_Constant_Padding/10.30.32700
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32725
        OpenCV_Dilate2D_Constant_Padding/10.10.14989
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/14.774.77147
        MLIR_Conv2D/17.27.297
        Buddy_Conv2D/10.310.312,252
        Buddy_Corr2D_Constant_Padding/10.8020.802868
        OpenCV_Filter2D_Constant_Padding/11.251.25560
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,649
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669105,099
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,931
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,253
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,245
        Buddy_Opening2D_Constant_Padding/10.3160.3162,229
        Buddy_Closing2D_Constant_Padding/10.3130.3132,256
        Buddy_TopHat2D_Constant_Padding/10.8040.804822
        Buddy_BottomHat2D_Constant_Padding/10.7990.799842
        OpenCV_Erode2D_Constant_Padding/10.1350.1355,153
        OpenCV_Opening2D_Constant_Padding/10.220.223,158
        OpenCV_Closing2D_Constant_Padding/10.2210.2213,163
        OpenCV_TopHat2D_Constant_Padding/10.2570.2572,727
        OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,742
        OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,826
        OpenCV_Dilate2D_Constant_Padding/10.1360.1365,148
        +
        Console output +
        2025-06-01T10:06:16+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.13, 1.26, 2.06
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      4.77 ms         4.77 ms          147
        +MLIR_Conv2D/1                                           7.20 ms         7.20 ms           97
        +Buddy_Conv2D/1                                         0.310 ms        0.310 ms         2252
        +Buddy_Corr2D_Constant_Padding/1                        0.802 ms        0.802 ms          868
        +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2649
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105099
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49931
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3253
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3245
        +Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2229
        +Buddy_Closing2D_Constant_Padding/1                     0.313 ms        0.313 ms         2256
        +Buddy_TopHat2D_Constant_Padding/1                      0.804 ms        0.804 ms          822
        +Buddy_BottomHat2D_Constant_Padding/1                   0.799 ms        0.799 ms          842
        +OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5153
        +OpenCV_Opening2D_Constant_Padding/1                    0.220 ms        0.220 ms         3158
        +OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3163
        +OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2727
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2742
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2826
        +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5148
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 9e39593b..49aab0f9 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.30.32227
        Buddy_Corr2D_Constant_Padding/10.80.8857
        OpenCV_Filter2D_Constant_Padding/11.31.3548
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14786
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32621
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103139
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048916
        Buddy_Erosion2D_Constant_Padding/10.20.23055
        Buddy_Dilation2D_Constant_Padding/10.30.33073
        Buddy_Opening2D_Constant_Padding/10.40.41855
        Buddy_Closing2D_Constant_Padding/10.40.42033
        Buddy_TopHat2D_Constant_Padding/10.90.9727
        Buddy_BottomHat2D_Constant_Padding/10.90.9732
        OpenCV_Erode2D_Constant_Padding/10.10.15021
        OpenCV_Opening2D_Constant_Padding/10.20.23154
        OpenCV_Closing2D_Constant_Padding/10.20.23178
        OpenCV_TopHat2D_Constant_Padding/10.30.32759
        OpenCV_BottomHat2D_Constant_Padding/10.30.32757
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32734
        OpenCV_Dilate2D_Constant_Padding/10.10.15010
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/14.914.91143
        MLIR_Conv2D/17.177.1798
        Buddy_Conv2D/10.310.312,260
        Buddy_Corr2D_Constant_Padding/10.7950.795875
        OpenCV_Filter2D_Constant_Padding/11.251.25560
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,871
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,651
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,620
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,783
        Buddy_Erosion2D_Constant_Padding/10.2180.2183,101
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,270
        Buddy_Opening2D_Constant_Padding/10.3190.3192,180
        Buddy_Closing2D_Constant_Padding/10.3120.3122,262
        Buddy_TopHat2D_Constant_Padding/10.8140.814841
        Buddy_BottomHat2D_Constant_Padding/10.820.82849
        OpenCV_Erode2D_Constant_Padding/10.1350.1355,157
        OpenCV_Opening2D_Constant_Padding/10.2190.2193,187
        OpenCV_Closing2D_Constant_Padding/10.2180.2183,207
        OpenCV_TopHat2D_Constant_Padding/10.2550.2552,745
        OpenCV_BottomHat2D_Constant_Padding/10.2530.2532,766
        OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,808
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,102
        +
        Console output +
        2025-06-01T10:06:40+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.08, 1.24, 2.03
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      4.91 ms         4.91 ms          143
        +MLIR_Conv2D/1                                           7.17 ms         7.17 ms           98
        +Buddy_Conv2D/1                                         0.310 ms        0.310 ms         2260
        +Buddy_Corr2D_Constant_Padding/1                        0.795 ms        0.795 ms          875
        +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4871
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2651
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104620
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49783
        +Buddy_Erosion2D_Constant_Padding/1                     0.218 ms        0.218 ms         3101
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3270
        +Buddy_Opening2D_Constant_Padding/1                     0.319 ms        0.319 ms         2180
        +Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2262
        +Buddy_TopHat2D_Constant_Padding/1                      0.814 ms        0.814 ms          841
        +Buddy_BottomHat2D_Constant_Padding/1                   0.820 ms        0.820 ms          849
        +OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5157
        +OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3187
        +OpenCV_Closing2D_Constant_Padding/1                    0.218 ms        0.218 ms         3207
        +OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2745
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.253 ms        0.253 ms         2766
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2808
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5102
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 1dbfa770..4f7ad6fd 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.860
        MLIR_Conv2D/129.929.923
        Buddy_Conv2D/11.31.3524
        Buddy_Corr2D_Constant_Padding/12.42.4294
        OpenCV_Filter2D_Constant_Padding/14.24.2166
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14766
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32632
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102992
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048914
        Buddy_Erosion2D_Constant_Padding/10.20.23110
        Buddy_Dilation2D_Constant_Padding/10.20.23094
        Buddy_Opening2D_Constant_Padding/10.40.41992
        Buddy_Closing2D_Constant_Padding/10.30.31972
        Buddy_TopHat2D_Constant_Padding/10.90.9731
        Buddy_BottomHat2D_Constant_Padding/10.90.9740
        OpenCV_Erode2D_Constant_Padding/10.10.15054
        OpenCV_Opening2D_Constant_Padding/10.20.23141
        OpenCV_Closing2D_Constant_Padding/10.20.23119
        OpenCV_TopHat2D_Constant_Padding/10.30.32692
        OpenCV_BottomHat2D_Constant_Padding/10.30.32695
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32680
        OpenCV_Dilate2D_Constant_Padding/10.10.15013
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/111.611.660
        MLIR_Conv2D/129.229.224
        Buddy_Conv2D/11.311.31536
        Buddy_Corr2D_Constant_Padding/12.332.33300
        OpenCV_Filter2D_Constant_Padding/14.114.11170
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,689
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,080
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,721
        Buddy_Erosion2D_Constant_Padding/10.2160.2163,235
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,260
        Buddy_Opening2D_Constant_Padding/10.310.312,249
        Buddy_Closing2D_Constant_Padding/10.3120.3122,139
        Buddy_TopHat2D_Constant_Padding/10.780.78826
        Buddy_BottomHat2D_Constant_Padding/10.7820.782830
        OpenCV_Erode2D_Constant_Padding/10.1380.1385,049
        OpenCV_Opening2D_Constant_Padding/10.2260.2263,095
        OpenCV_Closing2D_Constant_Padding/10.2250.2253,109
        OpenCV_TopHat2D_Constant_Padding/10.260.262,690
        OpenCV_BottomHat2D_Constant_Padding/10.260.262,688
        OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,759
        OpenCV_Dilate2D_Constant_Padding/10.1360.1365,116
        +
        Console output +
        2025-06-01T10:07:04+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.05, 1.22, 2.00
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           60
        +MLIR_Conv2D/1                                           29.2 ms         29.2 ms           24
        +Buddy_Conv2D/1                                          1.31 ms         1.31 ms          536
        +Buddy_Corr2D_Constant_Padding/1                         2.33 ms         2.33 ms          300
        +OpenCV_Filter2D_Constant_Padding/1                      4.11 ms         4.11 ms          170
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2689
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105080
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49721
        +Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3235
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3260
        +Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2249
        +Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2139
        +Buddy_TopHat2D_Constant_Padding/1                      0.780 ms        0.780 ms          826
        +Buddy_BottomHat2D_Constant_Padding/1                   0.782 ms        0.782 ms          830
        +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5049
        +OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3095
        +OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3109
        +OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2690
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2688
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2759
        +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5116
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index b663778c..242d00ec 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        MLIR_Conv2D/129.929.824
        Buddy_Conv2D/11.31.3542
        Buddy_Corr2D_Constant_Padding/12.42.4293
        OpenCV_Filter2D_Constant_Padding/14.24.2166
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14777
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32635
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102983
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048881
        Buddy_Erosion2D_Constant_Padding/10.20.23156
        Buddy_Dilation2D_Constant_Padding/10.20.23159
        Buddy_Opening2D_Constant_Padding/10.30.32113
        Buddy_Closing2D_Constant_Padding/10.30.32087
        Buddy_TopHat2D_Constant_Padding/10.90.9736
        Buddy_BottomHat2D_Constant_Padding/10.90.9740
        OpenCV_Erode2D_Constant_Padding/10.10.15026
        OpenCV_Opening2D_Constant_Padding/10.20.23102
        OpenCV_Closing2D_Constant_Padding/10.20.23074
        OpenCV_TopHat2D_Constant_Padding/10.30.32684
        OpenCV_BottomHat2D_Constant_Padding/10.30.32660
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32685
        OpenCV_Dilate2D_Constant_Padding/10.10.14970
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/111.511.560
        MLIR_Conv2D/129.129.124
        Buddy_Conv2D/11.381.38508
        Buddy_Corr2D_Constant_Padding/12.322.32301
        OpenCV_Filter2D_Constant_Padding/14.14.1170
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,064
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,925
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,267
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,269
        Buddy_Opening2D_Constant_Padding/10.3160.3162,235
        Buddy_Closing2D_Constant_Padding/10.3150.3152,209
        Buddy_TopHat2D_Constant_Padding/10.8010.801841
        Buddy_BottomHat2D_Constant_Padding/10.7850.785852
        OpenCV_Erode2D_Constant_Padding/10.1360.1365,129
        OpenCV_Opening2D_Constant_Padding/10.2250.2253,105
        OpenCV_Closing2D_Constant_Padding/10.2270.2273,082
        OpenCV_TopHat2D_Constant_Padding/10.2610.2612,679
        OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,672
        OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,751
        OpenCV_Dilate2D_Constant_Padding/10.1360.1365,094
        +
        Console output +
        2025-06-01T10:07:28+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.03, 1.20, 1.98
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           60
        +MLIR_Conv2D/1                                           29.1 ms         29.1 ms           24
        +Buddy_Conv2D/1                                          1.38 ms         1.38 ms          508
        +Buddy_Corr2D_Constant_Padding/1                         2.32 ms         2.32 ms          301
        +OpenCV_Filter2D_Constant_Padding/1                      4.10 ms         4.10 ms          170
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105064
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49925
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3267
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3269
        +Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2235
        +Buddy_Closing2D_Constant_Padding/1                     0.315 ms        0.315 ms         2209
        +Buddy_TopHat2D_Constant_Padding/1                      0.801 ms        0.801 ms          841
        +Buddy_BottomHat2D_Constant_Padding/1                   0.785 ms        0.785 ms          852
        +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5129
        +OpenCV_Opening2D_Constant_Padding/1                    0.225 ms        0.225 ms         3105
        +OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3082
        +OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2679
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2672
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2751
        +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5094
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 1f562044..e3fc5ad2 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.222.231
        MLIR_Conv2D/168.168.110
        Buddy_Conv2D/12.32.3300
        Buddy_Corr2D_Constant_Padding/14.74.7148
        OpenCV_Filter2D_Constant_Padding/18.88.879
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14765
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32606
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103232
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048866
        Buddy_Erosion2D_Constant_Padding/10.20.23108
        Buddy_Dilation2D_Constant_Padding/10.20.23124
        Buddy_Opening2D_Constant_Padding/10.30.31961
        Buddy_Closing2D_Constant_Padding/10.30.32094
        Buddy_TopHat2D_Constant_Padding/10.90.9764
        Buddy_BottomHat2D_Constant_Padding/10.90.9784
        OpenCV_Erode2D_Constant_Padding/10.10.14984
        OpenCV_Opening2D_Constant_Padding/10.20.23103
        OpenCV_Closing2D_Constant_Padding/10.20.23089
        OpenCV_TopHat2D_Constant_Padding/10.30.32707
        OpenCV_BottomHat2D_Constant_Padding/10.30.32743
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32695
        OpenCV_Dilate2D_Constant_Padding/10.10.14997
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/121.521.531
        MLIR_Conv2D/166.766.711
        Buddy_Conv2D/12.242.24312
        Buddy_Corr2D_Constant_Padding/14.674.67150
        OpenCV_Filter2D_Constant_Padding/18.618.6181
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,200
        OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,717
        Buddy_Erosion2D_Constant_Padding/10.2130.2133,275
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,279
        Buddy_Opening2D_Constant_Padding/10.310.312,249
        Buddy_Closing2D_Constant_Padding/10.3140.3142,267
        Buddy_TopHat2D_Constant_Padding/10.7890.789827
        Buddy_BottomHat2D_Constant_Padding/10.7630.763845
        OpenCV_Erode2D_Constant_Padding/10.1350.1355,188
        OpenCV_Opening2D_Constant_Padding/10.2290.2293,054
        OpenCV_Closing2D_Constant_Padding/10.2290.2293,052
        OpenCV_TopHat2D_Constant_Padding/10.2620.2622,667
        OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,674
        OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,759
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,114
        +
        Console output +
        2025-06-01T10:07:52+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.02, 1.19, 1.96
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      21.5 ms         21.5 ms           31
        +MLIR_Conv2D/1                                           66.7 ms         66.7 ms           11
        +Buddy_Conv2D/1                                          2.24 ms         2.24 ms          312
        +Buddy_Corr2D_Constant_Padding/1                         4.67 ms         4.67 ms          150
        +OpenCV_Filter2D_Constant_Padding/1                      8.61 ms         8.61 ms           81
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105200
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49717
        +Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3275
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3279
        +Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2249
        +Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.314 ms         2267
        +Buddy_TopHat2D_Constant_Padding/1                      0.789 ms        0.789 ms          827
        +Buddy_BottomHat2D_Constant_Padding/1                   0.763 ms        0.763 ms          845
        +OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5188
        +OpenCV_Opening2D_Constant_Padding/1                    0.229 ms        0.229 ms         3054
        +OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3052
        +OpenCV_TopHat2D_Constant_Padding/1                     0.262 ms        0.262 ms         2667
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2674
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2759
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5114
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index f141accd..6ed9986a 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.022.032
        MLIR_Conv2D/168.168.110
        Buddy_Conv2D/12.22.2304
        Buddy_Corr2D_Constant_Padding/14.74.7149
        OpenCV_Filter2D_Constant_Padding/18.88.879
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14719
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32629
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102880
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048902
        Buddy_Erosion2D_Constant_Padding/10.20.23148
        Buddy_Dilation2D_Constant_Padding/10.20.23112
        Buddy_Opening2D_Constant_Padding/10.40.41703
        Buddy_Closing2D_Constant_Padding/10.40.41933
        Buddy_TopHat2D_Constant_Padding/10.90.9724
        Buddy_BottomHat2D_Constant_Padding/10.90.9704
        OpenCV_Erode2D_Constant_Padding/10.10.15045
        OpenCV_Opening2D_Constant_Padding/10.20.23039
        OpenCV_Closing2D_Constant_Padding/10.20.23127
        OpenCV_TopHat2D_Constant_Padding/10.30.32665
        OpenCV_BottomHat2D_Constant_Padding/10.30.32664
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32666
        OpenCV_Dilate2D_Constant_Padding/10.10.14964
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/121.621.632
        MLIR_Conv2D/166.766.710
        Buddy_Conv2D/12.342.34299
        Buddy_Corr2D_Constant_Padding/14.674.67150
        OpenCV_Filter2D_Constant_Padding/18.68.681
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,853
        Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,142
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01450,003
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,229
        Buddy_Dilation2D_Constant_Padding/10.2170.2173,262
        Buddy_Opening2D_Constant_Padding/10.3080.3082,262
        Buddy_Closing2D_Constant_Padding/10.310.312,236
        Buddy_TopHat2D_Constant_Padding/10.7770.777855
        Buddy_BottomHat2D_Constant_Padding/10.7960.796826
        OpenCV_Erode2D_Constant_Padding/10.1360.1365,125
        OpenCV_Opening2D_Constant_Padding/10.2270.2273,079
        OpenCV_Closing2D_Constant_Padding/10.2260.2263,097
        OpenCV_TopHat2D_Constant_Padding/10.2610.2612,680
        OpenCV_BottomHat2D_Constant_Padding/10.260.262,694
        OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,766
        OpenCV_Dilate2D_Constant_Padding/10.140.144,993
        +
        Console output +
        2025-06-01T10:08:16+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.02, 1.17, 1.93
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      21.6 ms         21.6 ms           32
        +MLIR_Conv2D/1                                           66.7 ms         66.7 ms           10
        +Buddy_Conv2D/1                                          2.34 ms         2.34 ms          299
        +Buddy_Corr2D_Constant_Padding/1                         4.67 ms         4.67 ms          150
        +OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4853
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105142
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        50003
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3229
        +Buddy_Dilation2D_Constant_Padding/1                    0.217 ms        0.217 ms         3262
        +Buddy_Opening2D_Constant_Padding/1                     0.308 ms        0.308 ms         2262
        +Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2236
        +Buddy_TopHat2D_Constant_Padding/1                      0.777 ms        0.777 ms          855
        +Buddy_BottomHat2D_Constant_Padding/1                   0.796 ms        0.796 ms          826
        +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5125
        +OpenCV_Opening2D_Constant_Padding/1                    0.227 ms        0.227 ms         3079
        +OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3097
        +OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2680
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2694
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2766
        +OpenCV_Dilate2D_Constant_Padding/1                     0.140 ms        0.140 ms         4993
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index f6043071..28cb3cc4 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.235.220
        MLIR_Conv2D/1122.1122.16
        Buddy_Conv2D/14.24.2167
        Buddy_Corr2D_Constant_Padding/17.97.989
        OpenCV_Filter2D_Constant_Padding/15.95.9118
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14760
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32636
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103065
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.047744
        Buddy_Erosion2D_Constant_Padding/10.20.23166
        Buddy_Dilation2D_Constant_Padding/10.20.23164
        Buddy_Opening2D_Constant_Padding/10.30.32083
        Buddy_Closing2D_Constant_Padding/10.30.32139
        Buddy_TopHat2D_Constant_Padding/10.90.9739
        Buddy_BottomHat2D_Constant_Padding/10.90.9755
        OpenCV_Erode2D_Constant_Padding/10.10.15029
        OpenCV_Opening2D_Constant_Padding/10.20.23140
        OpenCV_Closing2D_Constant_Padding/10.20.23204
        OpenCV_TopHat2D_Constant_Padding/10.30.32744
        OpenCV_BottomHat2D_Constant_Padding/10.30.32737
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32177
        OpenCV_Dilate2D_Constant_Padding/10.10.15097
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/134.234.221
        MLIR_Conv2D/11191196
        Buddy_Conv2D/13.913.91179
        Buddy_Corr2D_Constant_Padding/17.797.7990
        OpenCV_Filter2D_Constant_Padding/15.895.89119
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,837
        Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,099
        OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,521
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,257
        Buddy_Dilation2D_Constant_Padding/10.2160.2153,222
        Buddy_Opening2D_Constant_Padding/10.3290.3292,228
        Buddy_Closing2D_Constant_Padding/10.3140.3132,221
        Buddy_TopHat2D_Constant_Padding/10.7890.789845
        Buddy_BottomHat2D_Constant_Padding/10.7930.793825
        OpenCV_Erode2D_Constant_Padding/10.1370.1375,117
        OpenCV_Opening2D_Constant_Padding/10.220.223,176
        OpenCV_Closing2D_Constant_Padding/10.220.223,179
        OpenCV_TopHat2D_Constant_Padding/10.2540.2542,758
        OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,740
        OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,779
        OpenCV_Dilate2D_Constant_Padding/10.1350.1355,176
        +
        Console output +
        2025-06-01T10:08:40+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.01, 1.16, 1.90
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      34.2 ms         34.2 ms           21
        +MLIR_Conv2D/1                                            119 ms          119 ms            6
        +Buddy_Conv2D/1                                          3.91 ms         3.91 ms          179
        +Buddy_Corr2D_Constant_Padding/1                         7.79 ms         7.79 ms           90
        +OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4837
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105099
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49521
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3257
        +Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.215 ms         3222
        +Buddy_Opening2D_Constant_Padding/1                     0.329 ms        0.329 ms         2228
        +Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.313 ms         2221
        +Buddy_TopHat2D_Constant_Padding/1                      0.789 ms        0.789 ms          845
        +Buddy_BottomHat2D_Constant_Padding/1                   0.793 ms        0.793 ms          825
        +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5117
        +OpenCV_Opening2D_Constant_Padding/1                    0.220 ms        0.220 ms         3176
        +OpenCV_Closing2D_Constant_Padding/1                    0.220 ms        0.220 ms         3179
        +OpenCV_TopHat2D_Constant_Padding/1                     0.254 ms        0.254 ms         2758
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2740
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2779
        +OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5176
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index b7108d4d..b8fa1267 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/134.834.820
        MLIR_Conv2D/1122.0122.06
        Buddy_Conv2D/13.93.9179
        Buddy_Corr2D_Constant_Padding/17.97.989
        OpenCV_Filter2D_Constant_Padding/15.95.9118
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14765
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32629
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102844
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048492
        Buddy_Erosion2D_Constant_Padding/10.20.23127
        Buddy_Dilation2D_Constant_Padding/10.20.23155
        Buddy_Opening2D_Constant_Padding/10.30.32000
        Buddy_Closing2D_Constant_Padding/10.40.42036
        Buddy_TopHat2D_Constant_Padding/10.90.9751
        Buddy_BottomHat2D_Constant_Padding/10.90.9755
        OpenCV_Erode2D_Constant_Padding/10.10.15001
        OpenCV_Opening2D_Constant_Padding/10.20.23193
        OpenCV_Closing2D_Constant_Padding/10.20.23193
        OpenCV_TopHat2D_Constant_Padding/10.30.32734
        OpenCV_BottomHat2D_Constant_Padding/10.30.32725
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32723
        OpenCV_Dilate2D_Constant_Padding/10.10.15067
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/134.334.320
        MLIR_Conv2D/11191196
        Buddy_Conv2D/13.983.98176
        Buddy_Corr2D_Constant_Padding/17.87.890
        OpenCV_Filter2D_Constant_Padding/15.895.89119
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,830
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,690
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,110
        OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,449
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,196
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,263
        Buddy_Opening2D_Constant_Padding/10.3130.3132,208
        Buddy_Closing2D_Constant_Padding/10.3270.3272,187
        Buddy_TopHat2D_Constant_Padding/10.8030.803835
        Buddy_BottomHat2D_Constant_Padding/10.7980.798832
        OpenCV_Erode2D_Constant_Padding/10.1380.1385,076
        OpenCV_Opening2D_Constant_Padding/10.2290.2293,051
        OpenCV_Closing2D_Constant_Padding/10.230.233,037
        OpenCV_TopHat2D_Constant_Padding/10.2630.2632,671
        OpenCV_BottomHat2D_Constant_Padding/10.2630.2632,661
        OpenCV_MorphGrad2D_Constant_Padding/10.2570.2572,719
        OpenCV_Dilate2D_Constant_Padding/10.1360.1365,132
        +
        Console output +
        2025-06-01T10:09:04+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.14, 1.88
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      34.3 ms         34.3 ms           20
        +MLIR_Conv2D/1                                            119 ms          119 ms            6
        +Buddy_Conv2D/1                                          3.98 ms         3.98 ms          176
        +Buddy_Corr2D_Constant_Padding/1                         7.80 ms         7.80 ms           90
        +OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4830
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2690
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105110
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49449
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3196
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3263
        +Buddy_Opening2D_Constant_Padding/1                     0.313 ms        0.313 ms         2208
        +Buddy_Closing2D_Constant_Padding/1                     0.327 ms        0.327 ms         2187
        +Buddy_TopHat2D_Constant_Padding/1                      0.803 ms        0.803 ms          835
        +Buddy_BottomHat2D_Constant_Padding/1                   0.798 ms        0.798 ms          832
        +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5076
        +OpenCV_Opening2D_Constant_Padding/1                    0.229 ms        0.229 ms         3051
        +OpenCV_Closing2D_Constant_Padding/1                    0.230 ms        0.230 ms         3037
        +OpenCV_TopHat2D_Constant_Padding/1                     0.263 ms        0.263 ms         2671
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.263 ms        0.263 ms         2661
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.257 ms        0.257 ms         2719
        +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5132
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 06953992..d968de74 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0139
        MLIR_Conv2D/17.47.494
        Buddy_Conv2D/10.70.71001
        Buddy_Corr2D_Constant_Padding/11.11.1645
        OpenCV_Filter2D_Constant_Padding/11.91.9367
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14717
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32633
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103033
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048825
        Buddy_Erosion2D_Constant_Padding/10.20.23139
        Buddy_Dilation2D_Constant_Padding/10.20.23065
        Buddy_Opening2D_Constant_Padding/10.40.41920
        Buddy_Closing2D_Constant_Padding/10.40.41938
        Buddy_TopHat2D_Constant_Padding/10.90.9725
        Buddy_BottomHat2D_Constant_Padding/10.90.9739
        OpenCV_Erode2D_Constant_Padding/10.20.24653
        OpenCV_Opening2D_Constant_Padding/10.20.23211
        OpenCV_Closing2D_Constant_Padding/10.20.23106
        OpenCV_TopHat2D_Constant_Padding/10.30.32694
        OpenCV_BottomHat2D_Constant_Padding/10.30.32735
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32736
        OpenCV_Dilate2D_Constant_Padding/10.10.14980
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/14.744.74148
        MLIR_Conv2D/17.27.297
        Buddy_Conv2D/10.7050.705994
        Buddy_Corr2D_Constant_Padding/11.071.07652
        OpenCV_Filter2D_Constant_Padding/11.861.86376
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
        Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,153
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,887
        Buddy_Erosion2D_Constant_Padding/10.2160.2163,272
        Buddy_Dilation2D_Constant_Padding/10.2150.2153,249
        Buddy_Opening2D_Constant_Padding/10.3110.3112,259
        Buddy_Closing2D_Constant_Padding/10.3070.3072,229
        Buddy_TopHat2D_Constant_Padding/10.7770.777858
        Buddy_BottomHat2D_Constant_Padding/10.7670.767831
        OpenCV_Erode2D_Constant_Padding/10.1360.1365,114
        OpenCV_Opening2D_Constant_Padding/10.2230.2233,131
        OpenCV_Closing2D_Constant_Padding/10.2220.2223,149
        OpenCV_TopHat2D_Constant_Padding/10.2550.2552,744
        OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,738
        OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,822
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,112
        +
        Console output +
        2025-06-01T10:03:44+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.01, 1.35, 2.23
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      4.74 ms         4.74 ms          148
        +MLIR_Conv2D/1                                           7.20 ms         7.20 ms           97
        +Buddy_Conv2D/1                                         0.705 ms        0.705 ms          994
        +Buddy_Corr2D_Constant_Padding/1                         1.07 ms         1.07 ms          652
        +OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105153
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49887
        +Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3272
        +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3249
        +Buddy_Opening2D_Constant_Padding/1                     0.311 ms        0.311 ms         2259
        +Buddy_Closing2D_Constant_Padding/1                     0.307 ms        0.307 ms         2229
        +Buddy_TopHat2D_Constant_Padding/1                      0.777 ms        0.777 ms          858
        +Buddy_BottomHat2D_Constant_Padding/1                   0.767 ms        0.767 ms          831
        +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5114
        +OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3131
        +OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3149
        +OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2744
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2738
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2822
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5112
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 85b5d8a3..1fd47c10 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.70.7962
        Buddy_Corr2D_Constant_Padding/11.11.1652
        OpenCV_Filter2D_Constant_Padding/11.91.9366
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14698
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32627
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102351
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048814
        Buddy_Erosion2D_Constant_Padding/10.20.23146
        Buddy_Dilation2D_Constant_Padding/10.20.23150
        Buddy_Opening2D_Constant_Padding/10.30.32128
        Buddy_Closing2D_Constant_Padding/10.30.32167
        Buddy_TopHat2D_Constant_Padding/10.90.9747
        Buddy_BottomHat2D_Constant_Padding/10.90.9769
        OpenCV_Erode2D_Constant_Padding/10.10.15009
        OpenCV_Opening2D_Constant_Padding/10.20.23227
        OpenCV_Closing2D_Constant_Padding/10.20.23120
        OpenCV_TopHat2D_Constant_Padding/10.30.32745
        OpenCV_BottomHat2D_Constant_Padding/10.30.32765
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32733
        OpenCV_Dilate2D_Constant_Padding/10.10.14956
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/14.884.88144
        MLIR_Conv2D/17.217.2197
        Buddy_Conv2D/10.7070.707988
        Buddy_Corr2D_Constant_Padding/11.051.05668
        OpenCV_Filter2D_Constant_Padding/11.861.86376
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,676
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666104,914
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,862
        Buddy_Erosion2D_Constant_Padding/10.2130.2133,188
        Buddy_Dilation2D_Constant_Padding/10.2160.2163,259
        Buddy_Opening2D_Constant_Padding/10.3170.3172,184
        Buddy_Closing2D_Constant_Padding/10.3140.3142,136
        Buddy_TopHat2D_Constant_Padding/10.7860.786814
        Buddy_BottomHat2D_Constant_Padding/10.7990.799847
        OpenCV_Erode2D_Constant_Padding/10.1390.1395,040
        OpenCV_Opening2D_Constant_Padding/10.2210.2213,163
        OpenCV_Closing2D_Constant_Padding/10.2190.2193,197
        OpenCV_TopHat2D_Constant_Padding/10.2550.2552,741
        OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,735
        OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,817
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,127
        +
        Console output +
        2025-06-01T10:04:08+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.01, 1.32, 2.20
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          144
        +MLIR_Conv2D/1                                           7.21 ms         7.21 ms           97
        +Buddy_Conv2D/1                                         0.707 ms        0.707 ms          988
        +Buddy_Corr2D_Constant_Padding/1                         1.05 ms         1.05 ms          668
        +OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2676
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104914
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49862
        +Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3188
        +Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.216 ms         3259
        +Buddy_Opening2D_Constant_Padding/1                     0.317 ms        0.317 ms         2184
        +Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.314 ms         2136
        +Buddy_TopHat2D_Constant_Padding/1                      0.786 ms        0.786 ms          814
        +Buddy_BottomHat2D_Constant_Padding/1                   0.799 ms        0.799 ms          847
        +OpenCV_Erode2D_Constant_Padding/1                      0.139 ms        0.139 ms         5040
        +OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3163
        +OpenCV_Closing2D_Constant_Padding/1                    0.219 ms        0.219 ms         3197
        +OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2741
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2735
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2817
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5127
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index eea020f2..1b283099 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.911.960
        MLIR_Conv2D/129.829.823
        Buddy_Conv2D/12.12.1332
        Buddy_Corr2D_Constant_Padding/11.81.8390
        OpenCV_Filter2D_Constant_Padding/12.72.7256
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14768
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32629
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103262
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048878
        Buddy_Erosion2D_Constant_Padding/10.20.23105
        Buddy_Dilation2D_Constant_Padding/10.20.23120
        Buddy_Opening2D_Constant_Padding/10.40.41986
        Buddy_Closing2D_Constant_Padding/10.30.31944
        Buddy_TopHat2D_Constant_Padding/10.90.9747
        Buddy_BottomHat2D_Constant_Padding/10.90.9739
        OpenCV_Erode2D_Constant_Padding/10.10.14963
        OpenCV_Opening2D_Constant_Padding/10.20.23142
        OpenCV_Closing2D_Constant_Padding/10.20.23089
        OpenCV_TopHat2D_Constant_Padding/10.30.32689
        OpenCV_BottomHat2D_Constant_Padding/10.30.32693
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32664
        OpenCV_Dilate2D_Constant_Padding/10.10.15045
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/111.511.560
        MLIR_Conv2D/129.129.124
        Buddy_Conv2D/12.042.04343
        Buddy_Corr2D_Constant_Padding/11.741.74400
        OpenCV_Filter2D_Constant_Padding/12.682.68261
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,858
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,687
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006670.00667104,992
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,870
        Buddy_Erosion2D_Constant_Padding/10.2160.2163,259
        Buddy_Dilation2D_Constant_Padding/10.2160.2163,239
        Buddy_Opening2D_Constant_Padding/10.320.322,223
        Buddy_Closing2D_Constant_Padding/10.3080.3082,211
        Buddy_TopHat2D_Constant_Padding/10.7910.791836
        Buddy_BottomHat2D_Constant_Padding/10.8060.805841
        OpenCV_Erode2D_Constant_Padding/10.1380.1385,076
        OpenCV_Opening2D_Constant_Padding/10.2270.2273,086
        OpenCV_Closing2D_Constant_Padding/10.2250.2253,114
        OpenCV_TopHat2D_Constant_Padding/10.2640.2642,653
        OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,674
        OpenCV_MorphGrad2D_Constant_Padding/10.2550.2552,741
        OpenCV_Dilate2D_Constant_Padding/10.1380.1385,067
        +
        Console output +
        2025-06-01T10:04:31+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.00, 1.30, 2.17
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           60
        +MLIR_Conv2D/1                                           29.1 ms         29.1 ms           24
        +Buddy_Conv2D/1                                          2.04 ms         2.04 ms          343
        +Buddy_Corr2D_Constant_Padding/1                         1.74 ms         1.74 ms          400
        +OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4858
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2687
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104992
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49870
        +Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3259
        +Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.216 ms         3239
        +Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2223
        +Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2211
        +Buddy_TopHat2D_Constant_Padding/1                      0.791 ms        0.791 ms          836
        +Buddy_BottomHat2D_Constant_Padding/1                   0.806 ms        0.805 ms          841
        +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5076
        +OpenCV_Opening2D_Constant_Padding/1                    0.227 ms        0.227 ms         3086
        +OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3114
        +OpenCV_TopHat2D_Constant_Padding/1                     0.264 ms        0.264 ms         2653
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2674
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.255 ms        0.255 ms         2741
        +OpenCV_Dilate2D_Constant_Padding/1                     0.138 ms        0.138 ms         5067
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8e4999cd..12cc3d84 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.611.660
        MLIR_Conv2D/129.929.923
        Buddy_Conv2D/12.12.1327
        Buddy_Corr2D_Constant_Padding/11.81.8389
        OpenCV_Filter2D_Constant_Padding/12.72.7255
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14755
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32633
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102709
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048854
        Buddy_Erosion2D_Constant_Padding/10.20.22456
        Buddy_Dilation2D_Constant_Padding/10.20.23097
        Buddy_Opening2D_Constant_Padding/10.40.42015
        Buddy_Closing2D_Constant_Padding/10.30.32002
        Buddy_TopHat2D_Constant_Padding/10.90.9721
        Buddy_BottomHat2D_Constant_Padding/10.90.9727
        OpenCV_Erode2D_Constant_Padding/10.10.14988
        OpenCV_Opening2D_Constant_Padding/10.20.23142
        OpenCV_Closing2D_Constant_Padding/10.20.23180
        OpenCV_TopHat2D_Constant_Padding/10.30.32711
        OpenCV_BottomHat2D_Constant_Padding/10.30.32642
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32712
        OpenCV_Dilate2D_Constant_Padding/10.10.15064
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/111.611.661
        MLIR_Conv2D/1292924
        Buddy_Conv2D/12.082.08337
        Buddy_Corr2D_Constant_Padding/11.751.75399
        OpenCV_Filter2D_Constant_Padding/12.682.68261
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,856
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,688
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,682
        OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,744
        Buddy_Erosion2D_Constant_Padding/10.2190.2193,240
        Buddy_Dilation2D_Constant_Padding/10.2150.2153,213
        Buddy_Opening2D_Constant_Padding/10.3080.3082,240
        Buddy_Closing2D_Constant_Padding/10.3090.3092,269
        Buddy_TopHat2D_Constant_Padding/10.820.82841
        Buddy_BottomHat2D_Constant_Padding/10.80.8846
        OpenCV_Erode2D_Constant_Padding/10.1380.1385,072
        OpenCV_Opening2D_Constant_Padding/10.2230.2233,139
        OpenCV_Closing2D_Constant_Padding/10.2280.2283,074
        OpenCV_TopHat2D_Constant_Padding/10.2610.2612,680
        OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,676
        OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,755
        OpenCV_Dilate2D_Constant_Padding/10.1350.1355,189
        +
        Console output +
        2025-06-01T10:04:55+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.06, 1.29, 2.15
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
        +MLIR_Conv2D/1                                           29.0 ms         29.0 ms           24
        +Buddy_Conv2D/1                                          2.08 ms         2.08 ms          337
        +Buddy_Corr2D_Constant_Padding/1                         1.75 ms         1.75 ms          399
        +OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4856
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2688
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104682
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49744
        +Buddy_Erosion2D_Constant_Padding/1                     0.219 ms        0.219 ms         3240
        +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3213
        +Buddy_Opening2D_Constant_Padding/1                     0.308 ms        0.308 ms         2240
        +Buddy_Closing2D_Constant_Padding/1                     0.309 ms        0.309 ms         2269
        +Buddy_TopHat2D_Constant_Padding/1                      0.820 ms        0.820 ms          841
        +Buddy_BottomHat2D_Constant_Padding/1                   0.800 ms        0.800 ms          846
        +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5072
        +OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3139
        +OpenCV_Closing2D_Constant_Padding/1                    0.228 ms        0.228 ms         3074
        +OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2680
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2676
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2755
        +OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5189
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 84b91387..a1c47c85 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/15.05.0140
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.40.41585
        Buddy_Corr2D_Constant_Padding/10.80.8879
        OpenCV_Filter2D_Constant_Padding/11.31.3549
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14708
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32618
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102964
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048907
        Buddy_Erosion2D_Constant_Padding/10.20.23019
        Buddy_Dilation2D_Constant_Padding/10.20.23004
        Buddy_Opening2D_Constant_Padding/10.30.32064
        Buddy_Closing2D_Constant_Padding/10.30.32044
        Buddy_TopHat2D_Constant_Padding/10.90.9782
        Buddy_BottomHat2D_Constant_Padding/10.90.9791
        OpenCV_Erode2D_Constant_Padding/10.10.15012
        OpenCV_Opening2D_Constant_Padding/10.20.23259
        OpenCV_Closing2D_Constant_Padding/10.20.23254
        OpenCV_TopHat2D_Constant_Padding/10.20.22816
        OpenCV_BottomHat2D_Constant_Padding/10.20.22807
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32604
        OpenCV_Dilate2D_Constant_Padding/10.10.14985
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/15.035.03139
        MLIR_Conv2D/17.387.3895
        Buddy_Conv2D/10.5220.5211,363
        Buddy_Corr2D_Constant_Padding/10.8140.814865
        OpenCV_Filter2D_Constant_Padding/11.281.28548
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1460.1464,815
        Buddy_Resize2D_Bilinear_Interpolation/10.2670.2672,628
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006810.00681102,923
        OpenCV_Resize2D_Bilinear_Interpolation/10.01430.014348,919
        Buddy_Erosion2D_Constant_Padding/10.2220.2223,130
        Buddy_Dilation2D_Constant_Padding/10.220.223,158
        Buddy_Opening2D_Constant_Padding/10.3340.3342,177
        Buddy_Closing2D_Constant_Padding/10.3380.3382,114
        Buddy_TopHat2D_Constant_Padding/10.8930.893734
        Buddy_BottomHat2D_Constant_Padding/10.8990.899761
        OpenCV_Erode2D_Constant_Padding/10.1390.1395,020
        OpenCV_Opening2D_Constant_Padding/10.2170.2173,210
        OpenCV_Closing2D_Constant_Padding/10.220.223,180
        OpenCV_TopHat2D_Constant_Padding/10.2550.2552,639
        OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,732
        OpenCV_MorphGrad2D_Constant_Padding/10.2470.2472,827
        OpenCV_Dilate2D_Constant_Padding/10.1360.1365,123
        +
        Console output +
        2025-06-01T09:59:45+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.45, 1.75, 2.60
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      5.03 ms         5.03 ms          139
        +MLIR_Conv2D/1                                           7.38 ms         7.38 ms           95
        +Buddy_Conv2D/1                                         0.522 ms        0.521 ms         1363
        +Buddy_Corr2D_Constant_Padding/1                        0.814 ms        0.814 ms          865
        +OpenCV_Filter2D_Constant_Padding/1                      1.28 ms         1.28 ms          548
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.146 ms        0.146 ms         4815
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.267 ms        0.267 ms         2628
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       102923
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        48919
        +Buddy_Erosion2D_Constant_Padding/1                     0.222 ms        0.222 ms         3130
        +Buddy_Dilation2D_Constant_Padding/1                    0.220 ms        0.220 ms         3158
        +Buddy_Opening2D_Constant_Padding/1                     0.334 ms        0.334 ms         2177
        +Buddy_Closing2D_Constant_Padding/1                     0.338 ms        0.338 ms         2114
        +Buddy_TopHat2D_Constant_Padding/1                      0.893 ms        0.893 ms          734
        +Buddy_BottomHat2D_Constant_Padding/1                   0.899 ms        0.899 ms          761
        +OpenCV_Erode2D_Constant_Padding/1                      0.139 ms        0.139 ms         5020
        +OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3210
        +OpenCV_Closing2D_Constant_Padding/1                    0.220 ms        0.220 ms         3180
        +OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2639
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2732
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.247 ms        0.247 ms         2827
        +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5123
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 3eca17ee..06a3a1cc 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9144
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.40.41587
        Buddy_Corr2D_Constant_Padding/10.80.8850
        OpenCV_Filter2D_Constant_Padding/11.31.3549
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14759
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32628
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102952
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048945
        Buddy_Erosion2D_Constant_Padding/10.20.23245
        Buddy_Dilation2D_Constant_Padding/10.20.22941
        Buddy_Opening2D_Constant_Padding/10.40.41920
        Buddy_Closing2D_Constant_Padding/10.40.41942
        Buddy_TopHat2D_Constant_Padding/11.01.0686
        Buddy_BottomHat2D_Constant_Padding/11.01.0687
        OpenCV_Erode2D_Constant_Padding/10.10.14813
        OpenCV_Opening2D_Constant_Padding/10.20.23185
        OpenCV_Closing2D_Constant_Padding/10.20.23185
        OpenCV_TopHat2D_Constant_Padding/10.30.32747
        OpenCV_BottomHat2D_Constant_Padding/10.30.32760
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32735
        OpenCV_Dilate2D_Constant_Padding/10.10.14913
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/14.894.89143
        MLIR_Conv2D/17.197.1997
        Buddy_Conv2D/10.5240.5241,337
        Buddy_Corr2D_Constant_Padding/10.7920.792882
        OpenCV_Filter2D_Constant_Padding/11.251.25561
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,818
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.262,683
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,687
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,857
        Buddy_Erosion2D_Constant_Padding/10.2210.2213,251
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,237
        Buddy_Opening2D_Constant_Padding/10.3230.3232,241
        Buddy_Closing2D_Constant_Padding/10.3080.3082,271
        Buddy_TopHat2D_Constant_Padding/10.8050.805841
        Buddy_BottomHat2D_Constant_Padding/10.8090.809846
        OpenCV_Erode2D_Constant_Padding/10.1360.1365,105
        OpenCV_Opening2D_Constant_Padding/10.2170.2173,219
        OpenCV_Closing2D_Constant_Padding/10.2170.2173,216
        OpenCV_TopHat2D_Constant_Padding/10.2580.2582,710
        OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,740
        OpenCV_MorphGrad2D_Constant_Padding/10.250.252,803
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,105
        +
        Console output +
        2025-06-01T10:00:09+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.48, 1.73, 2.57
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      4.89 ms         4.89 ms          143
        +MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
        +Buddy_Conv2D/1                                         0.524 ms        0.524 ms         1337
        +Buddy_Corr2D_Constant_Padding/1                        0.792 ms        0.792 ms          882
        +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          561
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4818
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.260 ms         2683
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104687
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49857
        +Buddy_Erosion2D_Constant_Padding/1                     0.221 ms        0.221 ms         3251
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3237
        +Buddy_Opening2D_Constant_Padding/1                     0.323 ms        0.323 ms         2241
        +Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2271
        +Buddy_TopHat2D_Constant_Padding/1                      0.805 ms        0.805 ms          841
        +Buddy_BottomHat2D_Constant_Padding/1                   0.809 ms        0.809 ms          846
        +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5105
        +OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3219
        +OpenCV_Closing2D_Constant_Padding/1                    0.217 ms        0.217 ms         3216
        +OpenCV_TopHat2D_Constant_Padding/1                     0.258 ms        0.258 ms         2710
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2740
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2803
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5105
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 65782f9c..c1c11478 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9143
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.50.51300
        Buddy_Corr2D_Constant_Padding/10.80.8860
        OpenCV_Filter2D_Constant_Padding/11.31.3548
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14746
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32630
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102577
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048983
        Buddy_Erosion2D_Constant_Padding/10.20.23160
        Buddy_Dilation2D_Constant_Padding/10.20.23155
        Buddy_Opening2D_Constant_Padding/10.40.42114
        Buddy_Closing2D_Constant_Padding/10.40.41704
        Buddy_TopHat2D_Constant_Padding/11.01.0677
        Buddy_BottomHat2D_Constant_Padding/11.01.0684
        OpenCV_Erode2D_Constant_Padding/10.10.15055
        OpenCV_Opening2D_Constant_Padding/10.20.23286
        OpenCV_Closing2D_Constant_Padding/10.20.23307
        OpenCV_TopHat2D_Constant_Padding/10.20.22840
        OpenCV_BottomHat2D_Constant_Padding/10.20.22838
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32741
        OpenCV_Dilate2D_Constant_Padding/10.10.15019
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/14.894.89143
        MLIR_Conv2D/17.197.1997
        Buddy_Conv2D/10.5230.5231,334
        Buddy_Corr2D_Constant_Padding/10.7930.793882
        OpenCV_Filter2D_Constant_Padding/11.251.25561
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
        Buddy_Resize2D_Bilinear_Interpolation/10.260.262,679
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006680.00668104,555
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,894
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,270
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,269
        Buddy_Opening2D_Constant_Padding/10.320.322,113
        Buddy_Closing2D_Constant_Padding/10.3060.3062,219
        Buddy_TopHat2D_Constant_Padding/10.7810.781863
        Buddy_BottomHat2D_Constant_Padding/10.7950.795836
        OpenCV_Erode2D_Constant_Padding/10.1350.1355,175
        OpenCV_Opening2D_Constant_Padding/10.2170.2173,222
        OpenCV_Closing2D_Constant_Padding/10.2220.2223,153
        OpenCV_TopHat2D_Constant_Padding/10.2570.2572,720
        OpenCV_BottomHat2D_Constant_Padding/10.2570.2572,721
        OpenCV_MorphGrad2D_Constant_Padding/10.250.252,805
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,121
        +
        Console output +
        2025-06-01T10:00:33+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.32, 1.67, 2.52
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      4.89 ms         4.89 ms          143
        +MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
        +Buddy_Conv2D/1                                         0.523 ms        0.523 ms         1334
        +Buddy_Corr2D_Constant_Padding/1                        0.793 ms        0.793 ms          882
        +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          561
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2679
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104555
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49894
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3270
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3269
        +Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2113
        +Buddy_Closing2D_Constant_Padding/1                     0.306 ms        0.306 ms         2219
        +Buddy_TopHat2D_Constant_Padding/1                      0.781 ms        0.781 ms          863
        +Buddy_BottomHat2D_Constant_Padding/1                   0.795 ms        0.795 ms          836
        +OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5175
        +OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3222
        +OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3153
        +OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2720
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.257 ms        0.257 ms         2721
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2805
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5121
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index ef0e3d70..3db0471f 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/14.94.9143
        MLIR_Conv2D/17.47.495
        Buddy_Conv2D/10.50.51375
        Buddy_Corr2D_Constant_Padding/10.80.8857
        OpenCV_Filter2D_Constant_Padding/11.31.3548
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14712
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32626
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103057
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048615
        Buddy_Erosion2D_Constant_Padding/10.20.23174
        Buddy_Dilation2D_Constant_Padding/10.20.23168
        Buddy_Opening2D_Constant_Padding/10.30.32081
        Buddy_Closing2D_Constant_Padding/10.30.32123
        Buddy_TopHat2D_Constant_Padding/10.90.9787
        Buddy_BottomHat2D_Constant_Padding/10.90.9762
        OpenCV_Erode2D_Constant_Padding/10.10.15037
        OpenCV_Opening2D_Constant_Padding/10.20.23118
        OpenCV_Closing2D_Constant_Padding/10.20.23165
        OpenCV_TopHat2D_Constant_Padding/10.30.32680
        OpenCV_BottomHat2D_Constant_Padding/10.30.32756
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32733
        OpenCV_Dilate2D_Constant_Padding/10.10.14846
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/14.874.87144
        MLIR_Conv2D/17.197.1897
        Buddy_Conv2D/10.5260.5261,322
        Buddy_Corr2D_Constant_Padding/10.7930.793885
        OpenCV_Filter2D_Constant_Padding/11.251.25560
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,865
        Buddy_Resize2D_Bilinear_Interpolation/10.260.262,697
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,155
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,883
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,259
        Buddy_Dilation2D_Constant_Padding/10.2130.2133,259
        Buddy_Opening2D_Constant_Padding/10.3150.3152,258
        Buddy_Closing2D_Constant_Padding/10.3120.3122,255
        Buddy_TopHat2D_Constant_Padding/10.7830.783854
        Buddy_BottomHat2D_Constant_Padding/10.7850.785821
        OpenCV_Erode2D_Constant_Padding/10.1370.1375,119
        OpenCV_Opening2D_Constant_Padding/10.2210.2213,167
        OpenCV_Closing2D_Constant_Padding/10.2230.2233,139
        OpenCV_TopHat2D_Constant_Padding/10.2580.2582,712
        OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,733
        OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,804
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,118
        +
        Console output +
        2025-06-01T10:00:56+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.23, 1.63, 2.49
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      4.87 ms         4.87 ms          144
        +MLIR_Conv2D/1                                           7.19 ms         7.18 ms           97
        +Buddy_Conv2D/1                                         0.526 ms        0.526 ms         1322
        +Buddy_Corr2D_Constant_Padding/1                        0.793 ms        0.793 ms          885
        +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4865
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2697
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105155
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49883
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3259
        +Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3259
        +Buddy_Opening2D_Constant_Padding/1                     0.315 ms        0.315 ms         2258
        +Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2255
        +Buddy_TopHat2D_Constant_Padding/1                      0.783 ms        0.783 ms          854
        +Buddy_BottomHat2D_Constant_Padding/1                   0.785 ms        0.785 ms          821
        +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5119
        +OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3167
        +OpenCV_Closing2D_Constant_Padding/1                    0.223 ms        0.223 ms         3139
        +OpenCV_TopHat2D_Constant_Padding/1                     0.258 ms        0.258 ms         2712
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2733
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2804
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5118
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b679d474..68d8c188 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        MLIR_Conv2D/129.929.924
        Buddy_Conv2D/13.13.1225
        Buddy_Corr2D_Constant_Padding/12.42.4299
        OpenCV_Filter2D_Constant_Padding/14.24.2167
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14718
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32624
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102278
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.047123
        Buddy_Erosion2D_Constant_Padding/10.20.23147
        Buddy_Dilation2D_Constant_Padding/10.20.23121
        Buddy_Opening2D_Constant_Padding/10.30.32029
        Buddy_Closing2D_Constant_Padding/10.30.32088
        Buddy_TopHat2D_Constant_Padding/10.90.9755
        Buddy_BottomHat2D_Constant_Padding/10.90.9740
        OpenCV_Erode2D_Constant_Padding/10.10.15077
        OpenCV_Opening2D_Constant_Padding/10.20.23073
        OpenCV_Closing2D_Constant_Padding/10.20.23064
        OpenCV_TopHat2D_Constant_Padding/10.30.32696
        OpenCV_BottomHat2D_Constant_Padding/10.30.32699
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32689
        OpenCV_Dilate2D_Constant_Padding/10.10.15007
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/111.511.561
        MLIR_Conv2D/128.928.924
        Buddy_Conv2D/13.033.03231
        Buddy_Corr2D_Constant_Padding/12.312.31303
        OpenCV_Filter2D_Constant_Padding/14.114.11171
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,848
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,211
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,990
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,269
        Buddy_Dilation2D_Constant_Padding/10.2150.2153,239
        Buddy_Opening2D_Constant_Padding/10.3140.3142,276
        Buddy_Closing2D_Constant_Padding/10.3080.3082,257
        Buddy_TopHat2D_Constant_Padding/10.8040.804840
        Buddy_BottomHat2D_Constant_Padding/10.7830.783836
        OpenCV_Erode2D_Constant_Padding/10.1360.1365,131
        OpenCV_Opening2D_Constant_Padding/10.2240.2243,120
        OpenCV_Closing2D_Constant_Padding/10.2270.2273,078
        OpenCV_TopHat2D_Constant_Padding/10.260.262,688
        OpenCV_BottomHat2D_Constant_Padding/10.2590.2592,702
        OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,764
        OpenCV_Dilate2D_Constant_Padding/10.1390.1395,022
        +
        Console output +
        2025-06-01T10:01:20+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.15, 1.58, 2.45
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           61
        +MLIR_Conv2D/1                                           28.9 ms         28.9 ms           24
        +Buddy_Conv2D/1                                          3.03 ms         3.03 ms          231
        +Buddy_Corr2D_Constant_Padding/1                         2.31 ms         2.31 ms          303
        +OpenCV_Filter2D_Constant_Padding/1                      4.11 ms         4.11 ms          171
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4848
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105211
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49990
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3269
        +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3239
        +Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2276
        +Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2257
        +Buddy_TopHat2D_Constant_Padding/1                      0.804 ms        0.804 ms          840
        +Buddy_BottomHat2D_Constant_Padding/1                   0.783 ms        0.783 ms          836
        +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5131
        +OpenCV_Opening2D_Constant_Padding/1                    0.224 ms        0.224 ms         3120
        +OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3078
        +OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2688
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.259 ms        0.259 ms         2702
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2764
        +OpenCV_Dilate2D_Constant_Padding/1                     0.139 ms        0.139 ms         5022
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5beb7557..c9393db2 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/111.811.859
        MLIR_Conv2D/129.829.823
        Buddy_Conv2D/13.13.1225
        Buddy_Corr2D_Constant_Padding/12.42.4297
        OpenCV_Filter2D_Constant_Padding/14.24.2165
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14749
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32628
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102987
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048924
        Buddy_Erosion2D_Constant_Padding/10.20.23176
        Buddy_Dilation2D_Constant_Padding/10.20.23152
        Buddy_Opening2D_Constant_Padding/10.40.41998
        Buddy_Closing2D_Constant_Padding/10.30.31943
        Buddy_TopHat2D_Constant_Padding/10.90.9750
        Buddy_BottomHat2D_Constant_Padding/10.90.9737
        OpenCV_Erode2D_Constant_Padding/10.10.15030
        OpenCV_Opening2D_Constant_Padding/10.20.23017
        OpenCV_Closing2D_Constant_Padding/10.20.23170
        OpenCV_TopHat2D_Constant_Padding/10.30.32657
        OpenCV_BottomHat2D_Constant_Padding/10.30.32690
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32671
        OpenCV_Dilate2D_Constant_Padding/10.10.14878
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/111.611.661
        MLIR_Conv2D/128.728.724
        Buddy_Conv2D/13.033.03231
        Buddy_Corr2D_Constant_Padding/12.312.31302
        OpenCV_Filter2D_Constant_Padding/14.14.1170
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,835
        Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006680.00668104,962
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,843
        Buddy_Erosion2D_Constant_Padding/10.2150.2153,259
        Buddy_Dilation2D_Constant_Padding/10.2140.2143,254
        Buddy_Opening2D_Constant_Padding/10.3070.3072,262
        Buddy_Closing2D_Constant_Padding/10.3190.3192,241
        Buddy_TopHat2D_Constant_Padding/10.7840.784851
        Buddy_BottomHat2D_Constant_Padding/10.7770.777840
        OpenCV_Erode2D_Constant_Padding/10.1350.1355,192
        OpenCV_Opening2D_Constant_Padding/10.2310.2313,030
        OpenCV_Closing2D_Constant_Padding/10.2290.2293,053
        OpenCV_TopHat2D_Constant_Padding/10.2680.2682,609
        OpenCV_BottomHat2D_Constant_Padding/10.2670.2672,624
        OpenCV_MorphGrad2D_Constant_Padding/10.2580.2582,714
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,119
        +
        Console output +
        2025-06-01T10:01:44+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.10, 1.53, 2.41
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
        +MLIR_Conv2D/1                                           28.7 ms         28.7 ms           24
        +Buddy_Conv2D/1                                          3.03 ms         3.03 ms          231
        +Buddy_Corr2D_Constant_Padding/1                         2.31 ms         2.31 ms          302
        +OpenCV_Filter2D_Constant_Padding/1                      4.10 ms         4.10 ms          170
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4835
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104962
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49843
        +Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3259
        +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3254
        +Buddy_Opening2D_Constant_Padding/1                     0.307 ms        0.307 ms         2262
        +Buddy_Closing2D_Constant_Padding/1                     0.319 ms        0.319 ms         2241
        +Buddy_TopHat2D_Constant_Padding/1                      0.784 ms        0.784 ms          851
        +Buddy_BottomHat2D_Constant_Padding/1                   0.777 ms        0.777 ms          840
        +OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5192
        +OpenCV_Opening2D_Constant_Padding/1                    0.231 ms        0.231 ms         3030
        +OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3053
        +OpenCV_TopHat2D_Constant_Padding/1                     0.268 ms        0.268 ms         2609
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.267 ms        0.267 ms         2624
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.258 ms        0.258 ms         2714
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5119
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index c83fa86f..3cb192cb 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.022.032
        MLIR_Conv2D/168.168.110
        Buddy_Conv2D/16.36.3109
        Buddy_Corr2D_Constant_Padding/14.74.7148
        OpenCV_Filter2D_Constant_Padding/18.88.879
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14771
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32626
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103087
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048675
        Buddy_Erosion2D_Constant_Padding/10.20.23188
        Buddy_Dilation2D_Constant_Padding/10.20.23182
        Buddy_Opening2D_Constant_Padding/10.30.32146
        Buddy_Closing2D_Constant_Padding/10.40.42142
        Buddy_TopHat2D_Constant_Padding/10.90.9745
        Buddy_BottomHat2D_Constant_Padding/10.90.9722
        OpenCV_Erode2D_Constant_Padding/10.10.15046
        OpenCV_Opening2D_Constant_Padding/10.20.23129
        OpenCV_Closing2D_Constant_Padding/10.20.23078
        OpenCV_TopHat2D_Constant_Padding/10.30.32660
        OpenCV_BottomHat2D_Constant_Padding/10.30.32723
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32685
        OpenCV_Dilate2D_Constant_Padding/10.10.14978
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/121.621.631
        MLIR_Conv2D/166.766.711
        Buddy_Conv2D/16.126.12114
        Buddy_Corr2D_Constant_Padding/14.654.65151
        OpenCV_Filter2D_Constant_Padding/18.68.681
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,138
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,889
        Buddy_Erosion2D_Constant_Padding/10.2130.2133,257
        Buddy_Dilation2D_Constant_Padding/10.2130.2133,242
        Buddy_Opening2D_Constant_Padding/10.3130.3132,237
        Buddy_Closing2D_Constant_Padding/10.3180.3182,232
        Buddy_TopHat2D_Constant_Padding/10.7750.775853
        Buddy_BottomHat2D_Constant_Padding/10.7880.788846
        OpenCV_Erode2D_Constant_Padding/10.1380.1385,075
        OpenCV_Opening2D_Constant_Padding/10.2320.2323,015
        OpenCV_Closing2D_Constant_Padding/10.2250.2253,114
        OpenCV_TopHat2D_Constant_Padding/10.2640.2642,647
        OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,672
        OpenCV_MorphGrad2D_Constant_Padding/10.2550.2552,749
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,111
        +
        Console output +
        2025-06-01T10:02:08+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.06, 1.49, 2.37
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      21.6 ms         21.6 ms           31
        +MLIR_Conv2D/1                                           66.7 ms         66.7 ms           11
        +Buddy_Conv2D/1                                          6.12 ms         6.12 ms          114
        +Buddy_Corr2D_Constant_Padding/1                         4.65 ms         4.65 ms          151
        +OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105138
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49889
        +Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3257
        +Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3242
        +Buddy_Opening2D_Constant_Padding/1                     0.313 ms        0.313 ms         2237
        +Buddy_Closing2D_Constant_Padding/1                     0.318 ms        0.318 ms         2232
        +Buddy_TopHat2D_Constant_Padding/1                      0.775 ms        0.775 ms          853
        +Buddy_BottomHat2D_Constant_Padding/1                   0.788 ms        0.788 ms          846
        +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5075
        +OpenCV_Opening2D_Constant_Padding/1                    0.232 ms        0.232 ms         3015
        +OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3114
        +OpenCV_TopHat2D_Constant_Padding/1                     0.264 ms        0.264 ms         2647
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2672
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.255 ms        0.255 ms         2749
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5111
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index b830bd5a..a88fc411 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/122.122.132
        MLIR_Conv2D/168.268.210
        Buddy_Conv2D/16.46.4110
        Buddy_Corr2D_Constant_Padding/14.74.7149
        OpenCV_Filter2D_Constant_Padding/18.88.880
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14781
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32627
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102862
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048877
        Buddy_Erosion2D_Constant_Padding/10.20.23093
        Buddy_Dilation2D_Constant_Padding/10.20.23107
        Buddy_Opening2D_Constant_Padding/10.30.32034
        Buddy_Closing2D_Constant_Padding/10.30.31989
        Buddy_TopHat2D_Constant_Padding/10.90.9754
        Buddy_BottomHat2D_Constant_Padding/10.90.9736
        OpenCV_Erode2D_Constant_Padding/10.10.15028
        OpenCV_Opening2D_Constant_Padding/10.20.23031
        OpenCV_Closing2D_Constant_Padding/10.20.23131
        OpenCV_TopHat2D_Constant_Padding/10.30.32697
        OpenCV_BottomHat2D_Constant_Padding/10.30.32706
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32674
        OpenCV_Dilate2D_Constant_Padding/10.20.24993
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/121.521.532
        MLIR_Conv2D/166.666.611
        Buddy_Conv2D/16.136.13114
        Buddy_Corr2D_Constant_Padding/14.654.65151
        OpenCV_Filter2D_Constant_Padding/18.68.681
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
        Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,362
        OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,959
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,220
        Buddy_Dilation2D_Constant_Padding/10.2150.2153,263
        Buddy_Opening2D_Constant_Padding/10.310.312,246
        Buddy_Closing2D_Constant_Padding/10.310.312,217
        Buddy_TopHat2D_Constant_Padding/10.7780.778828
        Buddy_BottomHat2D_Constant_Padding/10.7930.793833
        OpenCV_Erode2D_Constant_Padding/10.1360.1365,129
        OpenCV_Opening2D_Constant_Padding/10.2260.2263,091
        OpenCV_Closing2D_Constant_Padding/10.2260.2263,096
        OpenCV_TopHat2D_Constant_Padding/10.2610.2612,677
        OpenCV_BottomHat2D_Constant_Padding/10.2610.2612,684
        OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,763
        OpenCV_Dilate2D_Constant_Padding/10.1360.1365,132
        +
        Console output +
        2025-06-01T10:02:32+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.04, 1.45, 2.33
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      21.5 ms         21.5 ms           32
        +MLIR_Conv2D/1                                           66.6 ms         66.6 ms           11
        +Buddy_Conv2D/1                                          6.13 ms         6.13 ms          114
        +Buddy_Corr2D_Constant_Padding/1                         4.65 ms         4.65 ms          151
        +OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105362
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49959
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3220
        +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3263
        +Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2246
        +Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2217
        +Buddy_TopHat2D_Constant_Padding/1                      0.778 ms        0.778 ms          828
        +Buddy_BottomHat2D_Constant_Padding/1                   0.793 ms        0.793 ms          833
        +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5129
        +OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3091
        +OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3096
        +OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2677
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.261 ms        0.261 ms         2684
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2763
        +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5132
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 932f4a65..e0e22ffa 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.235.220
        MLIR_Conv2D/1121.2121.26
        Buddy_Conv2D/110.810.864
        Buddy_Corr2D_Constant_Padding/17.97.989
        OpenCV_Filter2D_Constant_Padding/15.95.9118
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14772
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32626
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0102812
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048301
        Buddy_Erosion2D_Constant_Padding/10.20.23183
        Buddy_Dilation2D_Constant_Padding/10.20.23163
        Buddy_Opening2D_Constant_Padding/10.30.32017
        Buddy_Closing2D_Constant_Padding/10.40.42024
        Buddy_TopHat2D_Constant_Padding/10.90.9749
        Buddy_BottomHat2D_Constant_Padding/10.90.9730
        OpenCV_Erode2D_Constant_Padding/10.10.15038
        OpenCV_Opening2D_Constant_Padding/10.20.23212
        OpenCV_Closing2D_Constant_Padding/10.20.23171
        OpenCV_TopHat2D_Constant_Padding/10.30.32701
        OpenCV_BottomHat2D_Constant_Padding/10.30.32755
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32727
        OpenCV_Dilate2D_Constant_Padding/10.10.15053
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/141.541.517
        MLIR_Conv2D/11441445
        Buddy_Conv2D/110.510.567
        Buddy_Corr2D_Constant_Padding/17.957.9590
        OpenCV_Filter2D_Constant_Padding/15.895.89120
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,856
        Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,692
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,165
        OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,405
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,218
        Buddy_Dilation2D_Constant_Padding/10.2150.2153,226
        Buddy_Opening2D_Constant_Padding/10.3120.3122,175
        Buddy_Closing2D_Constant_Padding/10.3120.3122,264
        Buddy_TopHat2D_Constant_Padding/10.8210.821843
        Buddy_BottomHat2D_Constant_Padding/10.8180.818844
        OpenCV_Erode2D_Constant_Padding/10.1360.1365,142
        OpenCV_Opening2D_Constant_Padding/10.2210.2213,174
        OpenCV_Closing2D_Constant_Padding/10.2210.2213,164
        OpenCV_TopHat2D_Constant_Padding/10.2560.2562,735
        OpenCV_BottomHat2D_Constant_Padding/10.2580.2582,710
        OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,787
        OpenCV_Dilate2D_Constant_Padding/10.1370.1375,117
        +
        Console output +
        2025-06-01T10:02:56+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.03, 1.42, 2.30
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      41.5 ms         41.5 ms           17
        +MLIR_Conv2D/1                                            144 ms          144 ms            5
        +Buddy_Conv2D/1                                          10.5 ms         10.5 ms           67
        +Buddy_Corr2D_Constant_Padding/1                         7.95 ms         7.95 ms           90
        +OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          120
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4856
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2692
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105165
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49405
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3218
        +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3226
        +Buddy_Opening2D_Constant_Padding/1                     0.312 ms        0.312 ms         2175
        +Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2264
        +Buddy_TopHat2D_Constant_Padding/1                      0.821 ms        0.821 ms          843
        +Buddy_BottomHat2D_Constant_Padding/1                   0.818 ms        0.818 ms          844
        +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5142
        +OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3174
        +OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3164
        +OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2735
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.258 ms        0.258 ms         2710
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2787
        +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5117
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 472a8f61..5d73a832 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -7,30 +7,89 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-05-26 22:41:02 UTC

        +

        imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        2025-06-01 10:22:14 UTC

        SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

        - - - - - - - - - - - - - - - - - - - - - - -
        NameTime (ns)CPU (ns)Iterations
        Eigen_Convolve2D/135.335.320
        MLIR_Conv2D/1121.0121.06
        Buddy_Conv2D/110.910.964
        Buddy_Corr2D_Constant_Padding/17.97.988
        OpenCV_Filter2D_Constant_Padding/15.95.9118
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.10.14703
        Buddy_Resize2D_Bilinear_Interpolation/10.30.32264
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00.0103210
        OpenCV_Resize2D_Bilinear_Interpolation/10.00.048545
        Buddy_Erosion2D_Constant_Padding/10.20.23132
        Buddy_Dilation2D_Constant_Padding/10.20.23160
        Buddy_Opening2D_Constant_Padding/10.30.32027
        Buddy_Closing2D_Constant_Padding/10.40.42017
        Buddy_TopHat2D_Constant_Padding/10.90.9750
        Buddy_BottomHat2D_Constant_Padding/10.90.9761
        OpenCV_Erode2D_Constant_Padding/10.10.15027
        OpenCV_Opening2D_Constant_Padding/10.20.23038
        OpenCV_Closing2D_Constant_Padding/10.20.23072
        OpenCV_TopHat2D_Constant_Padding/10.30.32693
        OpenCV_BottomHat2D_Constant_Padding/10.30.32663
        OpenCV_MorphGrad2D_Constant_Padding/10.30.32707
        OpenCV_Dilate2D_Constant_Padding/10.10.15003
        \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
        NameTime (ms)CPU (ms)Iterations
        Eigen_Convolve2D/134.234.220
        MLIR_Conv2D/11191196
        Buddy_Conv2D/110.510.567
        Buddy_Corr2D_Constant_Padding/17.897.8990
        OpenCV_Filter2D_Constant_Padding/15.895.89119
        Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
        Buddy_Resize2D_Bilinear_Interpolation/10.260.262,690
        OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,068
        OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,449
        Buddy_Erosion2D_Constant_Padding/10.2140.2143,244
        Buddy_Dilation2D_Constant_Padding/10.2250.2253,243
        Buddy_Opening2D_Constant_Padding/10.3070.3072,260
        Buddy_Closing2D_Constant_Padding/10.3130.3132,223
        Buddy_TopHat2D_Constant_Padding/10.8180.818827
        Buddy_BottomHat2D_Constant_Padding/10.7970.796861
        OpenCV_Erode2D_Constant_Padding/10.1370.1375,101
        OpenCV_Opening2D_Constant_Padding/10.2190.2193,187
        OpenCV_Closing2D_Constant_Padding/10.2220.2223,142
        OpenCV_TopHat2D_Constant_Padding/10.2560.2562,731
        OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,740
        OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,815
        OpenCV_Dilate2D_Constant_Padding/10.1350.1355,206
        +
        Console output +
        2025-06-01T10:03:20+00:00
        +Running ./bin/image-processing-benchmark
        +Run on (24 X 5100 MHz CPU s)
        +CPU Caches:
        +  L1 Data 48 KiB (x12)
        +  L1 Instruction 32 KiB (x12)
        +  L2 Unified 1280 KiB (x12)
        +  L3 Unified 30720 KiB (x1)
        +Load Average: 1.02, 1.38, 2.27
        +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
        +--------------------------------------------------------------------------------------------
        +Benchmark                                                  Time             CPU   Iterations
        +--------------------------------------------------------------------------------------------
        +Eigen_Convolve2D/1                                      34.2 ms         34.2 ms           20
        +MLIR_Conv2D/1                                            119 ms          119 ms            6
        +Buddy_Conv2D/1                                          10.5 ms         10.5 ms           67
        +Buddy_Corr2D_Constant_Padding/1                         7.89 ms         7.89 ms           90
        +OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
        +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
        +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2690
        +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105068
        +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49449
        +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3244
        +Buddy_Dilation2D_Constant_Padding/1                    0.225 ms        0.225 ms         3243
        +Buddy_Opening2D_Constant_Padding/1                     0.307 ms        0.307 ms         2260
        +Buddy_Closing2D_Constant_Padding/1                     0.313 ms        0.313 ms         2223
        +Buddy_TopHat2D_Constant_Padding/1                      0.818 ms        0.818 ms          827
        +Buddy_BottomHat2D_Constant_Padding/1                   0.797 ms        0.796 ms          861
        +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5101
        +OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3187
        +OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3142
        +OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2731
        +OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2740
        +OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2815
        +OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5206
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +Saved PNG file.
        +
        \ No newline at end of file diff --git a/site/index.html b/site/index.html index e39390f8..49cd152f 100644 --- a/site/index.html +++ b/site/index.html @@ -7,6 +7,7 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}

        Buddy-Benchmark results

        • deeplearning/dl-layer-ffn-benchmark.html
        • @@ -32,10 +33,13 @@

          Buddy-Benchmark results

          • deeplearning/dl-op-linalg-mathrsqrt-benchmark.html
          • deeplearning/dl-op-linalg-matmul-benchmark.html
          • deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html
          • +
          • deeplearning/dl-op-linalg-reduceaddf-benchmark.html
          • +
          • deeplearning/dl-op-linalg-reducemaxf-benchmark.html
          • deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
          • deeplearning/dl-op-matmul-transpose-b-benchmark.html
          • deeplearning/dl-op-tosa-transpose-benchmark.html
          • imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html
          • +
          • imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html
          • imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html
          • imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html
          • imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html
          • diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index 04d171b8..d875b3c4 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -7,10 +7,34 @@ tr:nth-child(even){background:#fafafa} details{border:1px solid #ccc;border-radius:.4rem;padding:.6rem} summary{font-weight:600;cursor:pointer} +.err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-05-26 22:41:02 UTC

            +

            vectorization/vectorization_matrix.json

            2025-06-01 10:22:14 UTC

            vectorization_matrix.json

            - - -
            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/119.519.535685429
            MLIR_MatVec/121.421.433101404
            \ No newline at end of file + + +
            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            MLIR_MatVec/120.520.535,030,976
            +
            Console output +
            2025-06-01T10:11:11+00:00
            +Running ./vectorization-matrix-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.00, 1.09, 1.76
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------
            +Benchmark              Time             CPU   Iterations
            +--------------------------------------------------------
            +MLIR_MatMul/1       18.8 ns         18.8 ns     37302822
            +MLIR_MatVec/1       20.5 ns         20.5 ns     35030976
            +--------------------------------------------------------
            +MLIR_MatMul: MLIR MatMul Operation + Nested Loop
            +[ 18 18 18 18 18 18 18 18 18 18 ]
            +--------------------------------------------------------
            +MLIR_MatVec: MLIR MatVec Operation
            +[ 18 18 18 18 18 18 18 18 18 18 ]
            +
            \ No newline at end of file diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log old mode 100755 new mode 100644 diff --git a/test_result/deeplearning/build_results_summary.log b/test_result/deeplearning/build_results_summary.log old mode 100755 new mode 100644 diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.json b/test_result/deeplearning/dl-layer-ffn-benchmark.json old mode 100755 new mode 100644 index 4c94760e..f2efeaac --- a/test_result/deeplearning/dl-layer-ffn-benchmark.json +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:12:34+00:00", + "date": "2025-06-01T09:43:17+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-layer-ffn-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00439,1.19629,1.95947], + "load_avg": [1.02002,1.92676,3.979], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 10758, - "real_time": 6.5206875738922074e-02, - "cpu_time": 6.5204998977505119e-02, + "iterations": 10788, + "real_time": 6.5253924752468523e-02, + "cpu_time": 6.5252047089358556e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 25878, - "real_time": 2.6888433440486709e-02, - "cpu_time": 2.6887743681891955e-02, + "iterations": 25830, + "real_time": 2.6970787858644568e-02, + "cpu_time": 2.6970320867208675e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log old mode 100755 new mode 100644 index 476f00a9..4a30e9a6 --- a/test_result/deeplearning/dl-layer-ffn-benchmark.log +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:12:34+00:00 +2025-06-01T09:43:17+00:00 Running ./dl-layer-ffn-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.20, 1.96 +Load Average: 1.02, 1.93, 3.98 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------- -DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10758 -DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25878 +DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10788 +DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25830 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json old mode 100755 new mode 100644 index ecdebb4c..d91335de --- a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:12:38+00:00", + "date": "2025-06-01T09:43:21+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-layer-rmsnorm-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00391,1.19287,1.9541], + "load_avg": [1.02002,1.92676,3.979], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 358748, - "real_time": 1.9752554668907084e-03, - "cpu_time": 1.9749622827165587e-03, + "iterations": 355522, + "real_time": 1.9578834433703969e-03, + "cpu_time": 1.9577640146038783e-03, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 753724, - "real_time": 9.0683698707158544e-04, - "cpu_time": 9.0681028997351815e-04, + "iterations": 763038, + "real_time": 9.0721212688664129e-04, + "cpu_time": 9.0718018892899164e-04, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log old mode 100755 new mode 100644 index fae3378b..66a6a4da --- a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:12:38+00:00 +2025-06-01T09:43:21+00:00 Running ./dl-layer-rmsnorm-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.19, 1.95 +Load Average: 1.02, 1.93, 3.98 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------ -DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 358748 -DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 753724 +DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 355522 +DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 763038 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.json b/test_result/deeplearning/dl-layer-selfattention-benchmark.json old mode 100755 new mode 100644 index 1b3b5134..f2451cc0 --- a/test_result/deeplearning/dl-layer-selfattention-benchmark.json +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:12:36+00:00", + "date": "2025-06-01T09:43:19+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-layer-selfattention-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00391,1.19287,1.9541], + "load_avg": [1.02002,1.92676,3.979], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 148, - "real_time": 4.7080184056146726e+00, - "cpu_time": 4.7073387770270267e+00, + "iterations": 149, + "real_time": 4.6853602007531476e+00, + "cpu_time": 4.6852666510067111e+00, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 447, - "real_time": 1.5711927248927571e+00, - "cpu_time": 1.5709791208053694e+00, + "real_time": 1.5682149470119136e+00, + "cpu_time": 1.5680948903803129e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log old mode 100755 new mode 100644 index 7af6fdca..f4e9fd5c --- a/test_result/deeplearning/dl-layer-selfattention-benchmark.log +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:12:36+00:00 +2025-06-01T09:43:19+00:00 Running ./dl-layer-selfattention-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,12 +6,12 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.19, 1.95 +Load Average: 1.02, 1.93, 3.98 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -DL_LAYER_ATTENTION/Scalar 4.71 ms 4.71 ms 148 +DL_LAYER_ATTENTION/Scalar 4.69 ms 4.69 ms 149 DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 447 ----------------------------------------------------------- Correctness Verification: PASS diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.json b/test_result/deeplearning/dl-model-lenet-benchmark.json old mode 100755 new mode 100644 index 3de6403d..12256cab --- a/test_result/deeplearning/dl-model-lenet-benchmark.json +++ b/test_result/deeplearning/dl-model-lenet-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:08:36+00:00", + "date": "2025-06-01T09:39:21+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-lenet-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.36816,1.44824,2.24854], + "load_avg": [2.00293,3.05371,4.85986], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4396, - "real_time": 1.5520746374775649e-01, - "cpu_time": 1.5520622338489537e-01, + "iterations": 4427, + "real_time": 1.6056818065873274e-01, + "cpu_time": 1.6056101242376328e-01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5074, - "real_time": 1.3665612941188610e-01, - "cpu_time": 1.3663077552227040e-01, + "iterations": 5106, + "real_time": 1.3631909335658823e-01, + "cpu_time": 1.3631605327066199e-01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log old mode 100755 new mode 100644 index add3c123..dd5e81a1 --- a/test_result/deeplearning/dl-model-lenet-benchmark.log +++ b/test_result/deeplearning/dl-model-lenet-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:08:36+00:00 +2025-06-01T09:39:21+00:00 Running ./dl-model-lenet-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.37, 1.45, 2.25 +Load Average: 2.00, 3.05, 4.86 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------- -DL_MODEL_LENET/Auto_Vectorization 0.155 ms 0.155 ms 4396 -DL_MODEL_LENET/Buddy_Vectorization 0.137 ms 0.137 ms 5074 +DL_MODEL_LENET/Auto_Vectorization 0.161 ms 0.161 ms 4427 +DL_MODEL_LENET/Buddy_Vectorization 0.136 ms 0.136 ms 5106 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json old mode 100755 new mode 100644 index 24f46507..e255bf23 --- a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:08:34+00:00", + "date": "2025-06-01T09:39:18+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-mobilenetv3-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.40039,1.45605,2.25537], + "load_avg": [2.00293,3.05371,4.85986], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 20, - "real_time": 3.5308412462472916e+01, - "cpu_time": 3.5303849899999996e+01, + "iterations": 18, + "real_time": 3.5876018926501274e+01, + "cpu_time": 3.5873441388888892e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 21, - "real_time": 3.2171089379560378e+01, - "cpu_time": 3.2166612142857147e+01, + "iterations": 22, + "real_time": 3.2652140764350243e+01, + "cpu_time": 3.2651343363636357e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log old mode 100755 new mode 100644 index ef144411..f3872e7a --- a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:08:34+00:00 +2025-06-01T09:39:18+00:00 Running ./dl-model-mobilenetv3-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.40, 1.46, 2.26 +Load Average: 2.00, 3.05, 4.86 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------- -BM_MobileNet_V3/BM_MobileNet_V3_scalar 35.3 ms 35.3 ms 20 -BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.2 ms 32.2 ms 21 +BM_MobileNet_V3/BM_MobileNet_V3_scalar 35.9 ms 35.9 ms 18 +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.7 ms 32.7 ms 22 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.json b/test_result/deeplearning/dl-model-resnet18-benchmark.json old mode 100755 new mode 100644 index 2ed2dc2d..6b9417af --- a/test_result/deeplearning/dl-model-resnet18-benchmark.json +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:12:31+00:00", + "date": "2025-06-01T09:43:14+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-resnet18-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00439,1.19629,1.95947], + "load_avg": [1.02197,1.94287,3.99512], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 7.2581797651946545e+02, - "cpu_time": 7.1790319600000009e+02, + "real_time": 7.1908619441092014e+02, + "cpu_time": 7.1777358900000013e+02, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 7.2334924899041653e+02, - "cpu_time": 7.2326446299999998e+02, + "real_time": 7.2598466090857983e+02, + "cpu_time": 7.1803150299999993e+02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.log b/test_result/deeplearning/dl-model-resnet18-benchmark.log old mode 100755 new mode 100644 index f3779a9b..2159c701 --- a/test_result/deeplearning/dl-model-resnet18-benchmark.log +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:12:31+00:00 +2025-06-01T09:43:14+00:00 Running ./dl-model-resnet18-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.20, 1.96 +Load Average: 1.02, 1.94, 4.00 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -DL_MODEL_Resnet18/Auto_Vectorization 726 ms 718 ms 1 -DL_MODEL_Resnet18/Buddy_Vectorization 723 ms 723 ms 1 +DL_MODEL_Resnet18/Auto_Vectorization 719 ms 718 ms 1 +DL_MODEL_Resnet18/Buddy_Vectorization 726 ms 718 ms 1 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.json b/test_result/deeplearning/dl-model-tinyllama-benchmark.json old mode 100755 new mode 100644 index f04efe52..c746579b --- a/test_result/deeplearning/dl-model-tinyllama-benchmark.json +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:03:18+00:00", + "date": "2025-06-01T09:33:00+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-tinyllama-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.60303,2.09766,2.74219], + "load_avg": [2.75537,5.10107,6.17822], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.4531263318471611e+05, - "cpu_time": 1.4530597402600001e+05, + "real_time": 1.7263756665587425e+05, + "cpu_time": 1.7263429748099999e+05, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 9.8432251755148172e+03, - "cpu_time": 9.8427707820000032e+03, + "real_time": 1.0490998193621635e+04, + "cpu_time": 1.0490786658000019e+04, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 7.8003611154854298e+03, - "cpu_time": 7.1571572710000164e+03, + "real_time": 8.2188854273408651e+03, + "cpu_time": 7.6072349719999866e+03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.log b/test_result/deeplearning/dl-model-tinyllama-benchmark.log old mode 100755 new mode 100644 index 026219f6..f5e906ed --- a/test_result/deeplearning/dl-model-tinyllama-benchmark.log +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:03:18+00:00 +2025-06-01T09:33:00+00:00 Running ./dl-model-tinyllama-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.60, 2.10, 2.74 +Load Average: 2.76, 5.10, 6.18 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ---------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------- -DL_MODEL_TINYLLAMA/scalar 145313 ms 145306 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt 9843 ms 9843 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt_omp 7800 ms 7157 ms 1 +DL_MODEL_TINYLLAMA/scalar 172638 ms 172634 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt 10491 ms 10491 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt_omp 8219 ms 7607 ms 1 ---------- Verification ---------- matmul_opt PASS matmul_opt_omp PASS diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.json b/test_result/deeplearning/dl-model-whisper-benchmark.json old mode 100755 new mode 100644 index c1f18f9c..51a20002 --- a/test_result/deeplearning/dl-model-whisper-benchmark.json +++ b/test_result/deeplearning/dl-model-whisper-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:08:38+00:00", + "date": "2025-06-01T09:39:22+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-whisper-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.36816,1.44824,2.24854], + "load_avg": [2.00244,3.03613,4.84424], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 8.0864415192976594e+04, - "cpu_time": 8.0855295398000002e+04, + "real_time": 7.9215591410174966e+04, + "cpu_time": 7.9212764204999999e+04, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.5875804258510470e+04, - "cpu_time": 3.5871486203000000e+04, + "real_time": 3.6910132765769958e+04, + "cpu_time": 3.6904499842000005e+04, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.log b/test_result/deeplearning/dl-model-whisper-benchmark.log old mode 100755 new mode 100644 index 70acfa9b..2f896336 --- a/test_result/deeplearning/dl-model-whisper-benchmark.log +++ b/test_result/deeplearning/dl-model-whisper-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:08:38+00:00 +2025-06-01T09:39:22+00:00 Running ./dl-model-whisper-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.37, 1.45, 2.25 +Load Average: 2.00, 3.04, 4.84 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_MODEL_Whisper/Auto_Vectorization 80864 ms 80855 ms 1 -DL_MODEL_Whisper/Buddy_Vectorization 35876 ms 35871 ms 1 +DL_MODEL_Whisper/Auto_Vectorization 79216 ms 79213 ms 1 +DL_MODEL_Whisper/Buddy_Vectorization 36910 ms 36904 ms 1 ----------------------------------------------------------- Correctness Verification for Output1: PASS Correctness Verification for Output2: FAIL diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json old mode 100755 new mode 100644 index 1fb664ee..8c844c94 --- a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:13:08+00:00", + "date": "2025-06-01T09:43:50+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithaddf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00098,1.17285,1.92188], + "load_avg": [1.01074,1.8374,3.88281], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23951, - "real_time": 2.9350469685000002e-02, - "cpu_time": 2.9346575633585240e-02, + "iterations": 23357, + "real_time": 2.9862665965978977e-02, + "cpu_time": 2.9862255341011262e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 174606, - "real_time": 4.2519020574317591e-03, - "cpu_time": 4.2513099836202651e-03, + "iterations": 164695, + "real_time": 3.9880004303424737e-03, + "cpu_time": 3.9879427426454967e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log old mode 100755 new mode 100644 index 01107b96..bc68b6b2 --- a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:08+00:00 +2025-06-01T09:43:50+00:00 Running ./dl-op-linalg-arithaddf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.92 +Load Average: 1.01, 1.84, 3.88 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_ADDF_SCALAR 0.029 ms 0.029 ms 23951 -BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 174606 +BM_ADDF_SCALAR 0.030 ms 0.030 ms 23357 +BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 164695 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json old mode 100755 new mode 100644 index 3d9d66cb..72f7533b --- a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:13:11+00:00", + "date": "2025-06-01T09:43:53+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithdivf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00049,1.16992,1.9165], + "load_avg": [1.00977,1.82324,3.86719], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 22508, - "real_time": 2.9681050269122190e-02, - "cpu_time": 2.9680807579527284e-02, + "iterations": 23918, + "real_time": 2.9331494164173792e-02, + "cpu_time": 2.9330557111798643e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 73818, - "real_time": 9.6117635958387036e-03, - "cpu_time": 9.6116319325909687e-03, + "iterations": 73794, + "real_time": 1.0297565909804414e-02, + "cpu_time": 1.0297454332330543e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log old mode 100755 new mode 100644 index 06005493..87c0b507 --- a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:11+00:00 +2025-06-01T09:43:53+00:00 Running ./dl-op-linalg-arithdivf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.92 +Load Average: 1.01, 1.82, 3.87 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_DIVF_SCALAR 0.030 ms 0.030 ms 22508 -BM_DIVF_AutoVectorization 0.010 ms 0.010 ms 73818 +BM_DIVF_SCALAR 0.029 ms 0.029 ms 23918 +BM_DIVF_AutoVectorization 0.010 ms 0.010 ms 73794 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json old mode 100755 new mode 100644 index 922f1953..28b9d220 --- a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:13:12+00:00", + "date": "2025-06-01T09:43:54+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithmulf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00049,1.16992,1.9165], + "load_avg": [1.00977,1.82324,3.86719], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23392, - "real_time": 2.9420993729111028e-02, - "cpu_time": 2.9417276718536250e-02, + "iterations": 23548, + "real_time": 2.9561160227525248e-02, + "cpu_time": 2.9560279938848310e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 175155, - "real_time": 3.9999952532304437e-03, - "cpu_time": 3.9999198538437381e-03, + "iterations": 146698, + "real_time": 3.9905801919875427e-03, + "cpu_time": 3.9904828423018707e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log old mode 100755 new mode 100644 index aa469118..16998437 --- a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:12+00:00 +2025-06-01T09:43:54+00:00 Running ./dl-op-linalg-arithmulf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.92 +Load Average: 1.01, 1.82, 3.87 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_MULF_SCALAR 0.029 ms 0.029 ms 23392 -BM_MULF_AutoVectorization 0.004 ms 0.004 ms 175155 +BM_MULF_SCALAR 0.030 ms 0.030 ms 23548 +BM_MULF_AutoVectorization 0.004 ms 0.004 ms 146698 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json old mode 100755 new mode 100644 index 6ad1caf3..ce4296db --- a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:13:15+00:00", + "date": "2025-06-01T09:43:57+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithnegf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00049,1.16992,1.9165], + "load_avg": [1.00879,1.80957,3.85156], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 30765, - "real_time": 2.2786648212342501e-02, - "cpu_time": 2.2786353388590929e-02, + "iterations": 30522, + "real_time": 2.2840192988802945e-02, + "cpu_time": 2.2839767708538104e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 290149, - "real_time": 2.4227396805301059e-03, - "cpu_time": 2.4227158287638420e-03, + "iterations": 279150, + "real_time": 2.4855299081273687e-03, + "cpu_time": 2.4854867562242519e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log old mode 100755 new mode 100644 index 532f64c3..069ba6d3 --- a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:15+00:00 +2025-06-01T09:43:57+00:00 Running ./dl-op-linalg-arithnegf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.92 +Load Average: 1.01, 1.81, 3.85 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_NEGF_SCALAR 0.023 ms 0.023 ms 30765 -BM_NEGF_AutoVectorization 0.002 ms 0.002 ms 290149 +BM_NEGF_SCALAR 0.023 ms 0.023 ms 30522 +BM_NEGF_AutoVectorization 0.002 ms 0.002 ms 279150 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json old mode 100755 new mode 100644 index 4be3ebde..38f62ff3 --- a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:13:17+00:00", + "date": "2025-06-01T09:43:59+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithsubf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1,1.16699,1.91113], + "load_avg": [1.00879,1.80957,3.85156], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23979, - "real_time": 2.9514157302017990e-02, - "cpu_time": 2.9510655823845863e-02, + "iterations": 23583, + "real_time": 2.9263793471199923e-02, + "cpu_time": 2.9262871305601495e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 175235, - "real_time": 4.0285944484571151e-03, - "cpu_time": 4.0285024795274904e-03, + "iterations": 175569, + "real_time": 3.9921569896972341e-03, + "cpu_time": 3.9921202718019690e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log old mode 100755 new mode 100644 index 27264f4f..4c1f090c --- a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:17+00:00 +2025-06-01T09:43:59+00:00 Running ./dl-op-linalg-arithsubf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.91 +Load Average: 1.01, 1.81, 3.85 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_SUBF_SCALAR 0.030 ms 0.030 ms 23979 -BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 175235 +BM_SUBF_SCALAR 0.029 ms 0.029 ms 23583 +BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 175569 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json old mode 100755 new mode 100644 index 6fb82eda..3cc18461 --- a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:12:58+00:00", + "date": "2025-06-01T09:43:41+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-batch-matmul-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00195,1.1792,1.93262], + "load_avg": [1.01367,1.86621,3.91455], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.5513852797448635e+03, - "cpu_time": 3.5510254610000002e+03, + "real_time": 3.5291669797152281e+03, + "cpu_time": 3.5290844250000000e+03, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.0021720249205828e+03, - "cpu_time": 1.0020681910000002e+03, + "real_time": 9.7394227981567383e+02, + "cpu_time": 9.7389873400000045e+02, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.9173206947743893e+02, - "cpu_time": 1.9172282700000042e+02, + "real_time": 1.9066828303039074e+02, + "cpu_time": 1.9066431600000033e+02, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.0962291248142719e+02, - "cpu_time": 1.0962218999999962e+02, + "real_time": 1.0904773883521557e+02, + "cpu_time": 1.0903675299999982e+02, "time_unit": "ms" }, { @@ -102,8 +102,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.1728629097342491e+02, - "cpu_time": 1.1727965299999977e+02, + "real_time": 1.1676310375332832e+02, + "cpu_time": 1.1675743199999999e+02, "time_unit": "ms" }, { @@ -116,8 +116,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.5626054368913174e+02, - "cpu_time": 3.5615294499999982e+02, + "real_time": 3.5137140378355980e+02, + "cpu_time": 3.5136892199999983e+02, "time_unit": "ms" }, { @@ -130,8 +130,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 7.9095944762229919e+01, - "cpu_time": 3.1461689000000348e+01, + "real_time": 8.0572975799441338e+01, + "cpu_time": 3.2125255999999602e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log old mode 100755 new mode 100644 index 7dc25b0e..869f52c6 --- a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:12:58+00:00 +2025-06-01T09:43:41+00:00 Running ./dl-op-linalg-batch-matmul-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,18 +6,18 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.93 +Load Average: 1.01, 1.87, 3.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3551 ms 3551 ms 1 -DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 1002 ms 1002 ms 1 -DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 192 ms 192 ms 1 -DL_OPS_BATCH_MATMUL/Tile/iterations:1 110 ms 110 ms 1 +DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3529 ms 3529 ms 1 +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 974 ms 974 ms 1 +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 191 ms 191 ms 1 +DL_OPS_BATCH_MATMUL/Tile/iterations:1 109 ms 109 ms 1 DL_OPS_BATCH_MATMUL/SCF/iterations:1 117 ms 117 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 356 ms 356 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 79.1 ms 31.5 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 351 ms 351 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 80.6 ms 32.1 ms 1 ---------- Verification ---------- Tile PASS SCF PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json old mode 100755 new mode 100644 index 6af577a1..3ea50b39 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:12:52+00:00", + "date": "2025-06-01T09:43:34+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-conv2d-nchw-fchw-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00244,1.18262,1.93799], + "load_avg": [1.01514,1.88086,3.93066], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 2, - "real_time": 2.8201012406498194e+02, - "cpu_time": 2.8196062250000000e+02, + "real_time": 2.8255500365048647e+02, + "cpu_time": 2.8254993450000001e+02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 62, - "real_time": 1.1751845959694155e+01, - "cpu_time": 1.1751463612903226e+01, + "iterations": 69, + "real_time": 1.0128497186562289e+01, + "cpu_time": 1.0128337550724638e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log old mode 100755 new mode 100644 index 8fb49caf..5b0748ec --- a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:12:52+00:00 +2025-06-01T09:43:34+00:00 Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.94 +Load Average: 1.02, 1.88, 3.93 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- -BM_Conv2DNchwFchw_SCALAR 282 ms 282 ms 2 -BM_Conv2DNchwFchw_Im2col 11.8 ms 11.8 ms 62 +BM_Conv2DNchwFchw_SCALAR 283 ms 283 ms 2 +BM_Conv2DNchwFchw_Im2col 10.1 ms 10.1 ms 69 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json old mode 100755 new mode 100644 index 3d653d03..67eb81ab --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:12:56+00:00", + "date": "2025-06-01T09:43:38+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-conv2d-nhwc-fhwc-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00195,1.1792,1.93262], + "load_avg": [1.01367,1.86621,3.91455], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 7.3650452867150307e+01, - "cpu_time": 7.3648638199999994e+01, + "real_time": 7.2542962804436684e+01, + "cpu_time": 7.2539947599999991e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 9.3432668596506119e+00, - "cpu_time": 9.3431779999999964e+00, + "real_time": 9.3459729105234146e+00, + "cpu_time": 9.3459027999999940e+00, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.7432916909456253e+00, - "cpu_time": 1.7432738000000003e+00, + "real_time": 1.7330713570117950e+00, + "cpu_time": 1.7330618000000020e+00, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.7292745411396027e+00, - "cpu_time": 1.7292597999999937e+00, + "real_time": 1.7253447324037552e+00, + "cpu_time": 1.7253329999999956e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log old mode 100755 new mode 100644 index a539bbd2..d2f0e099 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:12:56+00:00 +2025-06-01T09:43:38+00:00 Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.93 +Load Average: 1.01, 1.87, 3.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------------- -DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 73.7 ms 73.6 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.34 ms 9.34 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.74 ms 1.74 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 72.5 ms 72.5 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.35 ms 9.35 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.73 ms 1.73 ms 5 DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.73 ms 1.73 ms 5 ---------- Verification ---------- auto_vectorization PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json old mode 100755 new mode 100644 index fdaf6bed..02682d22 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:12:54+00:00", + "date": "2025-06-01T09:43:36+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-conv2d-nhwc-hwcf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00244,1.18262,1.93799], + "load_avg": [1.01514,1.88086,3.93066], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 22, - "real_time": 3.2554571686143227e+01, - "cpu_time": 3.2554057500000006e+01, + "iterations": 21, + "real_time": 3.2353343175990240e+01, + "cpu_time": 3.2352136285714280e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 115, - "real_time": 6.0713487960722139e+00, - "cpu_time": 6.0712789217391308e+00, + "iterations": 114, + "real_time": 6.1570262176948685e+00, + "cpu_time": 6.1568676929824564e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log old mode 100755 new mode 100644 index d9fe9718..0ee7bfdf --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:12:54+00:00 +2025-06-01T09:43:36+00:00 Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.94 +Load Average: 1.02, 1.88, 3.93 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------- -BM_CONV_2D_NHWC_HWCF_SCALAR 32.6 ms 32.6 ms 22 -BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.07 ms 6.07 ms 115 +BM_CONV_2D_NHWC_HWCF_SCALAR 32.4 ms 32.4 ms 21 +BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.16 ms 6.16 ms 114 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json old mode 100755 new mode 100644 index 16a28530..482a1fac --- a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:12:56+00:00", + "date": "2025-06-01T09:43:38+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00195,1.1792,1.93262], + "load_avg": [1.01367,1.86621,3.91455], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 4.8418600112199783e+00, - "cpu_time": 4.8414392000000008e+00, + "real_time": 7.2686385363340378e+00, + "cpu_time": 7.2663935999999989e+00, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.6818478703498840e+00, - "cpu_time": 1.6818200000000005e+00, + "real_time": 1.6840752214193344e+00, + "cpu_time": 1.6840592000000001e+00, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.2000650167465210e-01, - "cpu_time": 1.2000399999999996e-01, + "real_time": 1.2709796428680420e-01, + "cpu_time": 1.2711200000000006e-01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log old mode 100755 new mode 100644 index 517bdee8..33593701 --- a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:12:56+00:00 +2025-06-01T09:43:38+00:00 Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.93 +Load Average: 1.01, 1.87, 3.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------ -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 4.84 ms 4.84 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 7.27 ms 7.27 ms 5 DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.68 ms 1.68 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.120 ms 0.120 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.127 ms 0.127 ms 5 ---------- Verification ---------- auto_vectorization PASS vectorization PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json old mode 100755 new mode 100644 index acb58c6e..d824a439 --- a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:13:22+00:00", + "date": "2025-06-01T09:44:04+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-mathexp-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.08008,1.18115,1.91162], + "load_avg": [1.00781,1.7959,3.83594], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 15289, - "real_time": 4.5686233631569172e-02, - "cpu_time": 4.5684854862973377e-02, + "iterations": 15072, + "real_time": 4.5585291086441014e-02, + "cpu_time": 4.5584231754246279e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 22195, - "real_time": 3.1881006680031194e-02, - "cpu_time": 3.1880129173237209e-02, + "iterations": 22245, + "real_time": 3.1635766251485356e-02, + "cpu_time": 3.1635336165430433e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log old mode 100755 new mode 100644 index 67453461..bdfaa89a --- a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:22+00:00 +2025-06-01T09:44:04+00:00 Running ./dl-op-linalg-mathexp-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.18, 1.91 +Load Average: 1.01, 1.80, 3.84 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- -BM_EXP_SCALAR 0.046 ms 0.046 ms 15289 -BM_EXP_AutoVectorization 0.032 ms 0.032 ms 22195 +BM_EXP_SCALAR 0.046 ms 0.046 ms 15072 +BM_EXP_AutoVectorization 0.032 ms 0.032 ms 22245 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json old mode 100755 new mode 100644 index c843a4ca..c3598f2a --- a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:13:19+00:00", + "date": "2025-06-01T09:44:01+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-mathfpow-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1,1.16699,1.91113], + "load_avg": [1.00879,1.80957,3.85156], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 8096, - "real_time": 8.3391365306194232e-02, - "cpu_time": 8.3389921689723326e-02, + "iterations": 8120, + "real_time": 8.5159794432042274e-02, + "cpu_time": 8.5155036699507400e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 12303, - "real_time": 5.7476819394846557e-02, - "cpu_time": 5.7475786474843533e-02, + "iterations": 12142, + "real_time": 5.6896000527418576e-02, + "cpu_time": 5.6895375061769053e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log old mode 100755 new mode 100644 index ca5c13f0..96d084ae --- a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:19+00:00 +2025-06-01T09:44:01+00:00 Running ./dl-op-linalg-mathfpow-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.17, 1.91 +Load Average: 1.01, 1.81, 3.85 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_FPOW_SCALAR 0.083 ms 0.083 ms 8096 -BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12303 +BM_FPOW_SCALAR 0.085 ms 0.085 ms 8120 +BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12142 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json old mode 100755 new mode 100644 index a1577c5f..8eaf98ee --- a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:13:20+00:00", + "date": "2025-06-01T09:44:02+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-mathrsqrt-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.08008,1.18115,1.91162], + "load_avg": [1.00781,1.7959,3.83594], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 9407, - "real_time": 7.2711996517919431e-02, - "cpu_time": 7.2711034548740297e-02, + "iterations": 9624, + "real_time": 7.2778359749172489e-02, + "cpu_time": 7.2776962281795518e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 161010, - "real_time": 4.3512647488525760e-03, - "cpu_time": 4.3511522389913680e-03, + "iterations": 160866, + "real_time": 4.3446532387546302e-03, + "cpu_time": 4.3446116705829702e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log old mode 100755 new mode 100644 index c1254311..d95e5fc4 --- a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:20+00:00 +2025-06-01T09:44:02+00:00 Running ./dl-op-linalg-mathrsqrt-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.18, 1.91 +Load Average: 1.01, 1.80, 3.84 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9407 -BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 161010 +BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9624 +BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 160866 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json old mode 100755 new mode 100644 index c7d1939e..1683436c --- a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:12:40+00:00", + "date": "2025-06-01T09:43:23+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-matmul-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00342,1.18945,1.94873], + "load_avg": [1.01807,1.91113,3.96289], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 4.2198417354375124e+03, - "cpu_time": 4.2197150860000011e+03, + "real_time": 4.0962782036513090e+03, + "cpu_time": 4.0961748020000005e+03, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.3937856554985046e+03, - "cpu_time": 3.3936371459999996e+03, + "real_time": 3.1237379759550095e+03, + "cpu_time": 3.1235837540000002e+03, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.1714999191462994e+02, - "cpu_time": 1.1714971299999988e+02, + "real_time": 1.0998316481709480e+02, + "cpu_time": 1.0997834299999987e+02, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 5.9898965060710907e+01, - "cpu_time": 5.9899150999999762e+01, + "real_time": 5.9036938473582268e+01, + "cpu_time": 5.9036986999999819e+01, "time_unit": "ms" }, { @@ -102,8 +102,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.0501415953040123e+01, - "cpu_time": 9.7919029999999907e+00, + "real_time": 2.8818020597100258e+01, + "cpu_time": 1.0956099000000386e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log old mode 100755 new mode 100644 index 9d8f9029..574fb1d6 --- a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:12:40+00:00 +2025-06-01T09:43:23+00:00 Running ./dl-op-linalg-matmul-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,16 +6,16 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.19, 1.95 +Load Average: 1.02, 1.91, 3.96 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_OPS_MATMUL/scalar_O0/iterations:1 4220 ms 4220 ms 1 -DL_OPS_MATMUL/scalar_O3/iterations:1 3394 ms 3394 ms 1 -DL_OPS_MATMUL/tile/iterations:1 117 ms 117 ms 1 -DL_OPS_MATMUL/vec/iterations:1 59.9 ms 59.9 ms 1 -DL_OPS_MATMUL/vec_omp/iterations:1 30.5 ms 9.79 ms 1 +DL_OPS_MATMUL/scalar_O0/iterations:1 4096 ms 4096 ms 1 +DL_OPS_MATMUL/scalar_O3/iterations:1 3124 ms 3124 ms 1 +DL_OPS_MATMUL/tile/iterations:1 110 ms 110 ms 1 +DL_OPS_MATMUL/vec/iterations:1 59.0 ms 59.0 ms 1 +DL_OPS_MATMUL/vec_omp/iterations:1 28.8 ms 11.0 ms 1 ---------- Verification ---------- tile PASS vec PASS diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json old mode 100755 new mode 100644 index b4b37a1e..593d8cbf --- a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:12:56+00:00", + "date": "2025-06-01T09:43:38+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-pooling-nhwc-sum-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00195,1.1792,1.93262], + "load_avg": [1.01367,1.86621,3.91455], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2972, - "real_time": 2.3404261144943544e-01, - "cpu_time": 2.3401008546433380e-01, + "iterations": 2993, + "real_time": 2.3285509203832921e-01, + "cpu_time": 2.3285149949883063e-01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 16865, - "real_time": 4.1462073481970008e-02, - "cpu_time": 4.1460686095463981e-02, + "iterations": 16954, + "real_time": 4.1371273263362526e-02, + "cpu_time": 4.1370382623569658e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log old mode 100755 new mode 100644 index fe01dc2e..9f3bd465 --- a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:12:56+00:00 +2025-06-01T09:43:38+00:00 Running ./dl-op-linalg-pooling-nhwc-sum-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.18, 1.93 +Load Average: 1.01, 1.87, 3.91 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_POOLING_NHWC_SUM_SCALAR 0.234 ms 0.234 ms 2972 -BM_POOLING_NHWC_SUM_AutoVectorization 0.041 ms 0.041 ms 16865 +BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 2993 +BM_POOLING_NHWC_SUM_AutoVectorization 0.041 ms 0.041 ms 16954 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json new file mode 100644 index 00000000..2b2cb37f --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json @@ -0,0 +1,38 @@ +{ + "context": { + "date": "2025-06-01T09:44:07+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-reduceaddf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00684,1.78223,3.82031], + "library_build_type": "release" + }, + "benchmarks": [ diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log old mode 100755 new mode 100644 index f25afa63..b4753f53 --- a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:25+00:00 +2025-06-01T09:44:07+00:00 Running ./dl-op-linalg-reduceaddf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,5 +6,5 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.18, 1.91 +Load Average: 1.01, 1.78, 3.82 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json new file mode 100644 index 00000000..029a8bb3 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json @@ -0,0 +1,38 @@ +{ + "context": { + "date": "2025-06-01T09:44:07+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./dl-op-linalg-reducemaxf-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00684,1.78223,3.82031], + "library_build_type": "release" + }, + "benchmarks": [ diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log old mode 100755 new mode 100644 index 42781e26..b3019785 --- a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:25+00:00 +2025-06-01T09:44:07+00:00 Running ./dl-op-linalg-reducemaxf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,5 +6,5 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.18, 1.91 +Load Average: 1.01, 1.78, 3.82 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json old mode 100755 new mode 100644 index a05381b8..08e311f7 --- a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:13:25+00:00", + "date": "2025-06-01T09:44:07+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-softmax-exp-sum-div-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.08008,1.18115,1.91162], + "load_avg": [1.00684,1.78223,3.82031], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 125501, - "real_time": 5.5569801497239611e-03, - "cpu_time": 5.5562642688106074e-03, + "iterations": 123186, + "real_time": 5.5899351610014525e-03, + "cpu_time": 5.5898217898137786e-03, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 181845, - "real_time": 3.8459822164716873e-03, - "cpu_time": 3.8455111880997556e-03, + "iterations": 182176, + "real_time": 3.8475607200048833e-03, + "cpu_time": 3.8474830768048481e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log old mode 100755 new mode 100644 index 672556ed..914006d4 --- a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:25+00:00 +2025-06-01T09:44:07+00:00 Running ./dl-op-linalg-softmax-exp-sum-div-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.18, 1.91 +Load Average: 1.01, 1.78, 3.82 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 125501 -BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 181845 +BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 123186 +BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 182176 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json old mode 100755 new mode 100644 index 79beaecd..dc45928e --- a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:13:28+00:00", + "date": "2025-06-01T09:44:10+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-matmul-transpose-b-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.07324,1.17773,1.90625], + "load_avg": [1.00684,1.78223,3.82031], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.2652646832168102e+03, - "cpu_time": 1.2636451276000000e+03, + "real_time": 1.0452035043388605e+03, + "cpu_time": 1.0435922366000000e+03, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 3.2678271271288395e+02, - "cpu_time": 3.2677019700000011e+02, + "real_time": 2.7667955197393894e+02, + "cpu_time": 2.7666573640000001e+02, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 3.0260816961526871e+01, - "cpu_time": 1.8804882999999961e+01, + "real_time": 3.0310654267668724e+01, + "cpu_time": 1.9424157000000086e+01, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 8.3582005649805069e+01, - "cpu_time": 8.3579965600000250e+01, + "real_time": 8.4920790046453476e+01, + "cpu_time": 8.4914559799999978e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log old mode 100755 new mode 100644 index 0b18c23a..53ca4c7f --- a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:28+00:00 +2025-06-01T09:44:10+00:00 Running ./dl-op-matmul-transpose-b-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,15 +6,15 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.07, 1.18, 1.91 +Load Average: 1.01, 1.78, 3.82 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------------------- -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1265 ms 1264 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 327 ms 327 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 30.3 ms 18.8 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 83.6 ms 83.6 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1045 ms 1044 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 277 ms 277 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 30.3 ms 19.4 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 84.9 ms 84.9 ms 5 ---------- Verification ---------- scalar_O3 PASS scalar_O3_omp PASS diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json old mode 100755 new mode 100644 index 529b1a14..95272137 --- a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:13:27+00:00", + "date": "2025-06-01T09:44:09+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-tosa-transpose-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.07324,1.17773,1.90625], + "load_avg": [1.00684,1.78223,3.82031], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 2.9716657474637032e+01, - "cpu_time": 2.0871131400000003e+01, + "real_time": 2.4949089437723160e+01, + "cpu_time": 1.7676270800000001e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 2.1914588660001755e+01, - "cpu_time": 2.0427011800000006e+01, + "real_time": 1.7852439358830452e+01, + "cpu_time": 1.5263622600000005e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log old mode 100755 new mode 100644 index 78e840aa..550769b3 --- a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log @@ -1,4 +1,4 @@ -2025-05-26T21:13:27+00:00 +2025-06-01T09:44:09+00:00 Running ./dl-op-tosa-transpose-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,12 +6,12 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.07, 1.18, 1.91 +Load Average: 1.01, 1.78, 3.82 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------- -DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 29.7 ms 20.9 ms 5 -DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 21.9 ms 20.4 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 24.9 ms 17.7 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 17.9 ms 15.3 ms 5 ---------- Verification ---------- scalar_O3 PASS diff --git a/test_result/deeplearning/run_results_summary.log b/test_result/deeplearning/run_results_summary.log old mode 100755 new mode 100644 diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index 0daff6be..a6898082 --- a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:23:17+00:00", + "date": "2025-06-01T10:09:28+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.00293,4.94727,7.33203], + "load_avg": [1.00244,1.12988,1.85352], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 144, - "real_time": 4.8616694079505072e+00, - "cpu_time": 4.8615443888888876e+00, + "iterations": 143, + "real_time": 4.8773541030558674e+00, + "cpu_time": 4.8772445034965024e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 95, - "real_time": 7.3721156308525488e+00, - "cpu_time": 7.3717356526315809e+00, + "iterations": 97, + "real_time": 7.1875292462171965e+00, + "cpu_time": 7.1872436701030908e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1663, - "real_time": 4.2029142590330371e-01, - "cpu_time": 4.2028052796151533e-01, + "iterations": 1675, + "real_time": 4.1845967520528765e-01, + "cpu_time": 4.1844541313432831e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 651, - "real_time": 1.0759140208889995e+00, - "cpu_time": 1.0758246789554526e+00, + "iterations": 666, + "real_time": 1.0563786541511704e+00, + "cpu_time": 1.0563535375375375e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 367, - "real_time": 1.9042591896791223e+00, - "cpu_time": 1.9041629400544959e+00, + "iterations": 376, + "real_time": 1.8610506060909717e+00, + "cpu_time": 1.8609820930851073e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4748, - "real_time": 1.4552543145347865e-01, - "cpu_time": 1.4551487215669751e-01, + "iterations": 4817, + "real_time": 1.4254830093992085e-01, + "cpu_time": 1.4254228129541210e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2632, - "real_time": 2.6613064492432725e-01, - "cpu_time": 2.6612582446808503e-01, + "iterations": 2689, + "real_time": 2.5988410633263775e-01, + "cpu_time": 2.5987645704722923e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102984, - "real_time": 6.7871305550391663e-03, - "cpu_time": 6.7868588518604838e-03, + "iterations": 105253, + "real_time": 6.6504519812804995e-03, + "cpu_time": 6.6503598092215932e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48927, - "real_time": 1.4295738095923072e-02, - "cpu_time": 1.4295068591984003e-02, + "iterations": 49956, + "real_time": 1.4011868008661712e-02, + "cpu_time": 1.4011432080230613e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3155, - "real_time": 2.2388931287071784e-01, - "cpu_time": 2.2387921521394577e-01, + "iterations": 3274, + "real_time": 2.1450668653028362e-01, + "cpu_time": 2.1450349694563184e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3149, - "real_time": 2.2184868068420990e-01, - "cpu_time": 2.2184349539536333e-01, + "iterations": 3255, + "real_time": 2.1387947674628960e-01, + "cpu_time": 2.1387067096774173e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2068, - "real_time": 3.5998106139713837e-01, - "cpu_time": 3.5995235735009629e-01, + "iterations": 2261, + "real_time": 3.1398964921413719e-01, + "cpu_time": 3.1398281954887258e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1957, - "real_time": 3.5581509042559806e-01, - "cpu_time": 3.5579569596320909e-01, + "iterations": 2259, + "real_time": 3.1822234319606257e-01, + "cpu_time": 3.1820692784417859e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 731, - "real_time": 9.3642933097975034e-01, - "cpu_time": 9.3637127633378980e-01, + "iterations": 835, + "real_time": 8.0970415767438397e-01, + "cpu_time": 8.0966204670658792e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 729, - "real_time": 9.1944250121038151e-01, - "cpu_time": 9.1939183127572155e-01, + "iterations": 848, + "real_time": 7.8669123193424828e-01, + "cpu_time": 7.8665746108490497e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5005, - "real_time": 1.3918872033203994e-01, - "cpu_time": 1.3918274625374638e-01, + "iterations": 5119, + "real_time": 1.3659681348356797e-01, + "cpu_time": 1.3659497030670081e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3076, - "real_time": 2.2821756435851281e-01, - "cpu_time": 2.2820870481144373e-01, + "iterations": 3092, + "real_time": 2.2627924897865831e-01, + "cpu_time": 2.2626982018111277e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3193, - "real_time": 2.2276521902770560e-01, - "cpu_time": 2.2275964672721563e-01, + "iterations": 3082, + "real_time": 2.2718680044666348e-01, + "cpu_time": 2.2718060447761135e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2760, - "real_time": 2.5519946441594243e-01, - "cpu_time": 2.5519398152173933e-01, + "iterations": 2689, + "real_time": 2.6025091304505082e-01, + "cpu_time": 2.6024617776124936e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2736, - "real_time": 2.5641642666111392e-01, - "cpu_time": 2.5640509210526247e-01, + "iterations": 2705, + "real_time": 2.5885344212685407e-01, + "cpu_time": 2.5885116820702392e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2716, - "real_time": 2.5645334311981793e-01, - "cpu_time": 2.5644544771723121e-01, + "iterations": 2779, + "real_time": 2.5195326185582478e-01, + "cpu_time": 2.5194584382871649e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4963, - "real_time": 1.4073776507927674e-01, - "cpu_time": 1.4073016078984532e-01, + "iterations": 5113, + "real_time": 1.3684895659714744e-01, + "cpu_time": 1.3684487287306843e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..22773f00 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:09:28+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.13, 1.85 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.88 ms 4.88 ms 143 +MLIR_Conv2D/1 7.19 ms 7.19 ms 97 +Buddy_Conv2D/1 0.418 ms 0.418 ms 1675 +Buddy_Corr2D_Constant_Padding/1 1.06 ms 1.06 ms 666 +OpenCV_Filter2D_Constant_Padding/1 1.86 ms 1.86 ms 376 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4817 +Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2689 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105253 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49956 +Buddy_Erosion2D_Constant_Padding/1 0.215 ms 0.215 ms 3274 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3255 +Buddy_Opening2D_Constant_Padding/1 0.314 ms 0.314 ms 2261 +Buddy_Closing2D_Constant_Padding/1 0.318 ms 0.318 ms 2259 +Buddy_TopHat2D_Constant_Padding/1 0.810 ms 0.810 ms 835 +Buddy_BottomHat2D_Constant_Padding/1 0.787 ms 0.787 ms 848 +OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5119 +OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3092 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3082 +OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2689 +OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2705 +OpenCV_MorphGrad2D_Constant_Padding/1 0.252 ms 0.252 ms 2779 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5113 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json new file mode 100644 index 00000000..f6dd1e3f --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -0,0 +1,348 @@ +{ + "context": { + "date": "2025-06-01T10:09:52+00:00", + "host_name": "4ed4bacfe45d", + "executable": "./bin/image-processing-benchmark", + "num_cpus": 24, + "mhz_per_cpu": 5100, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 31457280, + "num_sharing": 24 + } + ], + "load_avg": [1.00049,1.12012,1.83398], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Eigen_Convolve2D/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Eigen_Convolve2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 143, + "real_time": 4.8817551037023117e+00, + "cpu_time": 4.8816076643356636e+00, + "time_unit": "ms" + }, + { + "name": "MLIR_Conv2D/1", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "MLIR_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 97, + "real_time": 7.1810987806811779e+00, + "cpu_time": 7.1807136907216469e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Conv2D/1", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Buddy_Conv2D/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1675, + "real_time": 4.1780948194105233e-01, + "cpu_time": 4.1780305313432836e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Corr2D_Constant_Padding/1", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Buddy_Corr2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 662, + "real_time": 1.0580040790208156e+00, + "cpu_time": 1.0579419244712991e+00, + "time_unit": "ms" + }, + { + "name": "OpenCV_Filter2D_Constant_Padding/1", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "OpenCV_Filter2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 376, + "real_time": 1.8630626046673415e+00, + "cpu_time": 1.8629938537234036e+00, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4857, + "real_time": 1.4277792460748584e-01, + "cpu_time": 1.4277163187152564e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Buddy_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2693, + "real_time": 2.6049529473068012e-01, + "cpu_time": 2.6048748124767929e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 105155, + "real_time": 6.6589808706085229e-03, + "cpu_time": 6.6587386905044982e-03, + "time_unit": "ms" + }, + { + "name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "OpenCV_Resize2D_Bilinear_Interpolation/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 49833, + "real_time": 1.4114310853593676e-02, + "cpu_time": 1.4113868821865060e-02, + "time_unit": "ms" + }, + { + "name": "Buddy_Erosion2D_Constant_Padding/1", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "Buddy_Erosion2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3267, + "real_time": 2.1469332323385956e-01, + "cpu_time": 2.1467965840220340e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Dilation2D_Constant_Padding/1", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "Buddy_Dilation2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3262, + "real_time": 2.1372071583255678e-01, + "cpu_time": 2.1371441998773794e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Opening2D_Constant_Padding/1", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "Buddy_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2259, + "real_time": 3.0885873203284975e-01, + "cpu_time": 3.0883940903054496e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_Closing2D_Constant_Padding/1", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "Buddy_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2232, + "real_time": 3.1060778776243808e-01, + "cpu_time": 3.1059406586021510e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_TopHat2D_Constant_Padding/1", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "Buddy_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 854, + "real_time": 8.0134714157505949e-01, + "cpu_time": 8.0130978220140392e-01, + "time_unit": "ms" + }, + { + "name": "Buddy_BottomHat2D_Constant_Padding/1", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "Buddy_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 833, + "real_time": 7.9537247892330531e-01, + "cpu_time": 7.9534494117647103e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Erode2D_Constant_Padding/1", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "OpenCV_Erode2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5118, + "real_time": 1.3649488219517478e-01, + "cpu_time": 1.3649151563110587e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Opening2D_Constant_Padding/1", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "OpenCV_Opening2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3117, + "real_time": 2.2415243349742309e-01, + "cpu_time": 2.2414577446262374e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3087, + "real_time": 2.2616963212085381e-01, + "cpu_time": 2.2616179786200155e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2731, + "real_time": 2.5616902332849795e-01, + "cpu_time": 2.5615968912486209e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2693, + "real_time": 2.5978066425561641e-01, + "cpu_time": 2.5977364017823990e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2799, + "real_time": 2.4936800784683943e-01, + "cpu_time": 2.4935978027867164e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5051, + "real_time": 1.3865814552557773e-01, + "cpu_time": 1.3865657156998629e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..02471cda --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:09:52+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.12, 1.83 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.88 ms 4.88 ms 143 +MLIR_Conv2D/1 7.18 ms 7.18 ms 97 +Buddy_Conv2D/1 0.418 ms 0.418 ms 1675 +Buddy_Corr2D_Constant_Padding/1 1.06 ms 1.06 ms 662 +OpenCV_Filter2D_Constant_Padding/1 1.86 ms 1.86 ms 376 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4857 +Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2693 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105155 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49833 +Buddy_Erosion2D_Constant_Padding/1 0.215 ms 0.215 ms 3267 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3262 +Buddy_Opening2D_Constant_Padding/1 0.309 ms 0.309 ms 2259 +Buddy_Closing2D_Constant_Padding/1 0.311 ms 0.311 ms 2232 +Buddy_TopHat2D_Constant_Padding/1 0.801 ms 0.801 ms 854 +Buddy_BottomHat2D_Constant_Padding/1 0.795 ms 0.795 ms 833 +OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5118 +OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3117 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3087 +OpenCV_TopHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2731 +OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2693 +OpenCV_MorphGrad2D_Constant_Padding/1 0.249 ms 0.249 ms 2799 +OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5051 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index a4651f0b..2d7c7421 --- a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:23:45+00:00", + "date": "2025-06-01T10:10:17+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2,4.66406,7.16064], + "load_avg": [1,1.10938,1.81006], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 59, - "real_time": 1.1827939521458189e+01, - "cpu_time": 1.1827101322033899e+01, + "iterations": 61, + "real_time": 1.1535078989433460e+01, + "cpu_time": 1.1535000672131149e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 24, - "real_time": 2.9902152329062421e+01, - "cpu_time": 2.9900978541666660e+01, + "iterations": 25, + "real_time": 2.9024361819028854e+01, + "cpu_time": 2.9023777840000001e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 751, - "real_time": 1.0176254473577009e+00, - "cpu_time": 1.0176059733688418e+00, + "iterations": 632, + "real_time": 1.1113395242468467e+00, + "cpu_time": 1.1113083670886079e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 391, - "real_time": 1.7920395364160733e+00, - "cpu_time": 1.7919525166240420e+00, + "iterations": 400, + "real_time": 1.7448093881830573e+00, + "cpu_time": 1.7447838250000003e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 255, - "real_time": 2.7389568660189125e+00, - "cpu_time": 2.7388458392156876e+00, + "iterations": 262, + "real_time": 2.6770318211376214e+00, + "cpu_time": 2.6768247938931302e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4778, - "real_time": 1.4560087359802351e-01, - "cpu_time": 1.4559084554206772e-01, + "iterations": 4855, + "real_time": 1.4248677518844113e-01, + "cpu_time": 1.4247969289392393e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2630, - "real_time": 2.6603678361771227e-01, - "cpu_time": 2.6602515285171091e-01, + "iterations": 2692, + "real_time": 2.6068802961711546e-01, + "cpu_time": 2.6068349999999985e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103033, - "real_time": 6.7959410668000245e-03, - "cpu_time": 6.7955963623305181e-03, + "iterations": 105416, + "real_time": 6.6310110770308973e-03, + "cpu_time": 6.6309139883888547e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48860, - "real_time": 1.6051803029710720e-02, - "cpu_time": 1.6050967110110514e-02, + "iterations": 49870, + "real_time": 1.4010478047897181e-02, + "cpu_time": 1.4010075977541595e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3134, - "real_time": 2.2470747996133530e-01, - "cpu_time": 2.2469278525845587e-01, + "iterations": 3258, + "real_time": 2.1404682151502155e-01, + "cpu_time": 2.1404371424186619e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2971, - "real_time": 2.2581702669394693e-01, - "cpu_time": 2.2580936755301245e-01, + "iterations": 3246, + "real_time": 2.1395063944716403e-01, + "cpu_time": 2.1394007516943925e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1881, - "real_time": 3.4690140719302215e-01, - "cpu_time": 3.4687723391812830e-01, + "iterations": 2260, + "real_time": 3.2007832047158635e-01, + "cpu_time": 3.2006619601769948e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2009, - "real_time": 3.4828037635463577e-01, - "cpu_time": 3.4826756097560951e-01, + "iterations": 2223, + "real_time": 3.0985082283086512e-01, + "cpu_time": 3.0983889653621222e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 714, - "real_time": 9.2675730179087457e-01, - "cpu_time": 9.2670533893557483e-01, + "iterations": 827, + "real_time": 8.0566855987585728e-01, + "cpu_time": 8.0563922007255140e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 723, - "real_time": 9.3064499171360560e-01, - "cpu_time": 9.3059483955739986e-01, + "iterations": 852, + "real_time": 8.2004826489678573e-01, + "cpu_time": 8.2000983802816896e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4965, - "real_time": 1.4058820535649225e-01, - "cpu_time": 1.4057882396777469e-01, + "iterations": 5096, + "real_time": 1.3718380646859082e-01, + "cpu_time": 1.3718256004709559e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3088, - "real_time": 2.2168861282265556e-01, - "cpu_time": 2.2166768620466271e-01, + "iterations": 3136, + "real_time": 2.2296642240764078e-01, + "cpu_time": 2.2295898022959179e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3129, - "real_time": 2.2427948622152652e-01, - "cpu_time": 2.2426256631511651e-01, + "iterations": 3085, + "real_time": 2.2682713365825299e-01, + "cpu_time": 2.2682429335494333e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2625, - "real_time": 2.6387358208497363e-01, - "cpu_time": 2.6386711885714254e-01, + "iterations": 2693, + "real_time": 2.5972659152168526e-01, + "cpu_time": 2.5971731006312543e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2625, - "real_time": 2.6865486658754800e-01, - "cpu_time": 2.6863586247619137e-01, + "iterations": 2686, + "real_time": 2.6035926575902141e-01, + "cpu_time": 2.6035738272524256e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2692, - "real_time": 2.5986144485237161e-01, - "cpu_time": 2.5984780089153114e-01, + "iterations": 2746, + "real_time": 2.5441637364448810e-01, + "cpu_time": 2.5440569264384544e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5053, - "real_time": 1.3817052432281909e-01, - "cpu_time": 1.3816524757569701e-01, + "iterations": 5208, + "real_time": 1.3434360577752055e-01, + "cpu_time": 1.3434074481566843e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..1d883f92 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:10:17+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.11, 1.81 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.5 ms 11.5 ms 61 +MLIR_Conv2D/1 29.0 ms 29.0 ms 25 +Buddy_Conv2D/1 1.11 ms 1.11 ms 632 +Buddy_Corr2D_Constant_Padding/1 1.74 ms 1.74 ms 400 +OpenCV_Filter2D_Constant_Padding/1 2.68 ms 2.68 ms 262 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.142 ms 0.142 ms 4855 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2692 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105416 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49870 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3258 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3246 +Buddy_Opening2D_Constant_Padding/1 0.320 ms 0.320 ms 2260 +Buddy_Closing2D_Constant_Padding/1 0.310 ms 0.310 ms 2223 +Buddy_TopHat2D_Constant_Padding/1 0.806 ms 0.806 ms 827 +Buddy_BottomHat2D_Constant_Padding/1 0.820 ms 0.820 ms 852 +OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5096 +OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3136 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3085 +OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2693 +OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2686 +OpenCV_MorphGrad2D_Constant_Padding/1 0.254 ms 0.254 ms 2746 +OpenCV_Dilate2D_Constant_Padding/1 0.134 ms 0.134 ms 5208 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index 3b942a13..c408eb70 --- a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:24:09+00:00", + "date": "2025-06-01T10:10:41+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.06152,4.46436,7.02783], + "load_avg": [1,1.09961,1.78809], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 60, - "real_time": 1.1859718554963669e+01, - "cpu_time": 1.1859415466666666e+01, + "iterations": 61, + "real_time": 1.1567577933434579e+01, + "cpu_time": 1.1567413278688525e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 24, - "real_time": 2.9875528533011675e+01, - "cpu_time": 2.9873963000000003e+01, + "iterations": 25, + "real_time": 2.9013997018337250e+01, + "cpu_time": 2.9013626159999994e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 738, - "real_time": 1.0141539980523631e+00, - "cpu_time": 1.0141161287262870e+00, + "iterations": 685, + "real_time": 1.0203661655422545e+00, + "cpu_time": 1.0203240744525550e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 393, - "real_time": 1.7817526648138620e+00, - "cpu_time": 1.7816172111959294e+00, + "iterations": 400, + "real_time": 1.7526307236403227e+00, + "cpu_time": 1.7526052500000000e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 255, - "real_time": 2.7528921763102212e+00, - "cpu_time": 2.7528130431372522e+00, + "iterations": 261, + "real_time": 2.6768200827398521e+00, + "cpu_time": 2.6767474022988518e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4756, - "real_time": 1.4574629629831337e-01, - "cpu_time": 1.4573879415475194e-01, + "iterations": 4858, + "real_time": 1.4230732592463052e-01, + "cpu_time": 1.4230317393989289e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2630, - "real_time": 2.6615853828622360e-01, - "cpu_time": 2.6614812585551312e-01, + "iterations": 2692, + "real_time": 2.6008169433824374e-01, + "cpu_time": 2.6007254606240704e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103027, - "real_time": 6.7919607560599695e-03, - "cpu_time": 6.7916588564162781e-03, + "iterations": 105372, + "real_time": 6.6475084024833139e-03, + "cpu_time": 6.6474395854686256e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48458, - "real_time": 1.4418768695910375e-02, - "cpu_time": 1.4418251145321751e-02, + "iterations": 49847, + "real_time": 1.4039105948245419e-02, + "cpu_time": 1.4038696832306864e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3152, - "real_time": 2.2244602997271998e-01, - "cpu_time": 2.2243656757614191e-01, + "iterations": 3249, + "real_time": 2.1369310503521124e-01, + "cpu_time": 2.1368993444136664e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3142, - "real_time": 2.2203278716565847e-01, - "cpu_time": 2.2202909579885419e-01, + "iterations": 3265, + "real_time": 2.1320457811286220e-01, + "cpu_time": 2.1319636539050579e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2069, - "real_time": 3.3419092044904070e-01, - "cpu_time": 3.3418129096181753e-01, + "iterations": 2214, + "real_time": 3.1372998835217769e-01, + "cpu_time": 3.1372075700090379e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2004, - "real_time": 3.3194217050146912e-01, - "cpu_time": 3.3190982884231574e-01, + "iterations": 2229, + "real_time": 3.0766479326717089e-01, + "cpu_time": 3.0765202960969057e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 753, - "real_time": 9.1564339421105734e-01, - "cpu_time": 9.1559011819389058e-01, + "iterations": 828, + "real_time": 7.8975142020246258e-01, + "cpu_time": 7.8972666304347949e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 762, - "real_time": 8.7081289696177155e-01, - "cpu_time": 8.7080586220472411e-01, + "iterations": 854, + "real_time": 7.7664592811528077e-01, + "cpu_time": 7.7661059836065438e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4961, - "real_time": 1.4088589137142596e-01, - "cpu_time": 1.4087717335214653e-01, + "iterations": 5075, + "real_time": 1.3745506518873676e-01, + "cpu_time": 1.3745407389162539e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3152, - "real_time": 2.1964701266042169e-01, - "cpu_time": 2.1964015609137086e-01, + "iterations": 3111, + "real_time": 2.2506565262711115e-01, + "cpu_time": 2.2505757312761160e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2470, - "real_time": 2.2777517133878794e-01, - "cpu_time": 2.2776716882591155e-01, + "iterations": 3056, + "real_time": 2.2886360200677866e-01, + "cpu_time": 2.2885967277486849e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2673, - "real_time": 2.5705931714488200e-01, - "cpu_time": 2.5705270594837276e-01, + "iterations": 2672, + "real_time": 2.6211865430329734e-01, + "cpu_time": 2.6211163136227550e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2749, - "real_time": 2.5811154434641043e-01, - "cpu_time": 2.5809966496907932e-01, + "iterations": 2653, + "real_time": 2.6406221305182687e-01, + "cpu_time": 2.6405938220882053e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2717, - "real_time": 2.5727819030255045e-01, - "cpu_time": 2.5726741663599595e-01, + "iterations": 2750, + "real_time": 2.5381605868989771e-01, + "cpu_time": 2.5380610654545421e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5082, - "real_time": 1.3795288426992885e-01, - "cpu_time": 1.3794680322707623e-01, + "iterations": 5201, + "real_time": 1.3454156696590133e-01, + "cpu_time": 1.3453595366275758e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..08741675 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:10:41+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.10, 1.79 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.6 ms 11.6 ms 61 +MLIR_Conv2D/1 29.0 ms 29.0 ms 25 +Buddy_Conv2D/1 1.02 ms 1.02 ms 685 +Buddy_Corr2D_Constant_Padding/1 1.75 ms 1.75 ms 400 +OpenCV_Filter2D_Constant_Padding/1 2.68 ms 2.68 ms 261 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.142 ms 0.142 ms 4858 +Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2692 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105372 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49847 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3249 +Buddy_Dilation2D_Constant_Padding/1 0.213 ms 0.213 ms 3265 +Buddy_Opening2D_Constant_Padding/1 0.314 ms 0.314 ms 2214 +Buddy_Closing2D_Constant_Padding/1 0.308 ms 0.308 ms 2229 +Buddy_TopHat2D_Constant_Padding/1 0.790 ms 0.790 ms 828 +Buddy_BottomHat2D_Constant_Padding/1 0.777 ms 0.777 ms 854 +OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5075 +OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3111 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3056 +OpenCV_TopHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2672 +OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2653 +OpenCV_MorphGrad2D_Constant_Padding/1 0.254 ms 0.254 ms 2750 +OpenCV_Dilate2D_Constant_Padding/1 0.135 ms 0.135 ms 5201 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index 8caa890c..5f3d8f41 --- a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:19:15+00:00", + "date": "2025-06-01T10:05:28+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.1123,8.58008,8.91211], + "load_avg": [1.18652,1.28955,2.11279], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 144, - "real_time": 4.8711563124217925e+00, - "cpu_time": 4.8709695138888893e+00, + "real_time": 4.8743747304090199e+00, + "cpu_time": 4.8743142152777779e+00, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 95, - "real_time": 7.3745613623606534e+00, - "cpu_time": 7.3740730210526300e+00, + "real_time": 7.3758271963972790e+00, + "cpu_time": 7.3755260105263138e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2234, - "real_time": 3.1500383327742615e-01, - "cpu_time": 3.1498164324082373e-01, + "iterations": 2241, + "real_time": 3.1217087819899686e-01, + "cpu_time": 3.1216030700580105e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 850, - "real_time": 8.2385407651171960e-01, - "cpu_time": 8.2383643647058880e-01, + "iterations": 849, + "real_time": 8.2071478949208143e-01, + "cpu_time": 8.2067061248527651e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 546, - "real_time": 1.2790120646848784e+00, - "cpu_time": 1.2789613168498175e+00, + "iterations": 547, + "real_time": 1.2778125710145212e+00, + "cpu_time": 1.2777362102376604e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4747, - "real_time": 1.4571417911770421e-01, - "cpu_time": 1.4570690183273646e-01, + "iterations": 4774, + "real_time": 1.4609858747938462e-01, + "cpu_time": 1.4609281943862598e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2629, - "real_time": 2.6612855584710304e-01, - "cpu_time": 2.6610386801065034e-01, + "iterations": 2627, + "real_time": 2.6652880571711685e-01, + "cpu_time": 2.6652126570232204e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102542, - "real_time": 6.8208885203973361e-03, - "cpu_time": 6.8207057205827868e-03, + "iterations": 103069, + "real_time": 6.7016581119360014e-03, + "cpu_time": 6.7014803869252636e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48907, - "real_time": 1.4288110696726128e-02, - "cpu_time": 1.4287668370580897e-02, + "iterations": 49911, + "real_time": 1.4021569295595974e-02, + "cpu_time": 1.4021311073711216e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3132, - "real_time": 2.2415018558388011e-01, - "cpu_time": 2.2413483556832717e-01, + "iterations": 3259, + "real_time": 2.1410988367712180e-01, + "cpu_time": 2.1410759803620766e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3121, - "real_time": 2.2181368127670550e-01, - "cpu_time": 2.2180239250240327e-01, + "iterations": 3230, + "real_time": 2.1453265723521495e-01, + "cpu_time": 2.1452785417956655e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2059, - "real_time": 3.3811343206891509e-01, - "cpu_time": 3.3810039873725029e-01, + "iterations": 2255, + "real_time": 3.0947743921216470e-01, + "cpu_time": 3.0946956718403540e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2107, - "real_time": 3.4437413974061759e-01, - "cpu_time": 3.4436074560987162e-01, + "iterations": 2273, + "real_time": 3.0895578070616231e-01, + "cpu_time": 3.0893229960404744e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 725, - "real_time": 9.0386319263228054e-01, - "cpu_time": 9.0380061931034261e-01, + "iterations": 855, + "real_time": 7.7638528936090523e-01, + "cpu_time": 7.7633451111111051e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 744, - "real_time": 9.1072037485578372e-01, - "cpu_time": 9.1067572983870981e-01, + "iterations": 856, + "real_time": 7.7369363290893145e-01, + "cpu_time": 7.7366387383177460e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5040, - "real_time": 1.3834959014304102e-01, - "cpu_time": 1.3834366051587288e-01, + "iterations": 5148, + "real_time": 1.3578200977651667e-01, + "cpu_time": 1.3577912917637908e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3222, - "real_time": 2.4203569839305569e-01, - "cpu_time": 2.4201785878336499e-01, + "iterations": 3185, + "real_time": 2.1949250724757297e-01, + "cpu_time": 2.1948565588697025e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3151, - "real_time": 2.2163398455094246e-01, - "cpu_time": 2.2162477150111126e-01, + "iterations": 3143, + "real_time": 2.2253769026453465e-01, + "cpu_time": 2.2253109035952864e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2758, - "real_time": 2.5459998263622902e-01, - "cpu_time": 2.5458755329949295e-01, + "iterations": 2699, + "real_time": 2.5904624301577195e-01, + "cpu_time": 2.5903694368284486e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2688, - "real_time": 2.5977311763978961e-01, - "cpu_time": 2.5976750632440504e-01, + "iterations": 2714, + "real_time": 2.5754975938761859e-01, + "cpu_time": 2.5754484340456885e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2740, - "real_time": 2.5554979084073193e-01, - "cpu_time": 2.5554263649635012e-01, + "iterations": 2791, + "real_time": 2.5068771725421052e-01, + "cpu_time": 2.5067944965962069e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4990, - "real_time": 1.4015850172432248e-01, - "cpu_time": 1.4015310941883707e-01, + "iterations": 5112, + "real_time": 1.3699782254387141e-01, + "cpu_time": 1.3699192468701091e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..f3a9e94d --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:05:28+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.19, 1.29, 2.11 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.87 ms 4.87 ms 144 +MLIR_Conv2D/1 7.38 ms 7.38 ms 95 +Buddy_Conv2D/1 0.312 ms 0.312 ms 2241 +Buddy_Corr2D_Constant_Padding/1 0.821 ms 0.821 ms 849 +OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 547 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4774 +Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2627 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103069 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49911 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3259 +Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3230 +Buddy_Opening2D_Constant_Padding/1 0.309 ms 0.309 ms 2255 +Buddy_Closing2D_Constant_Padding/1 0.309 ms 0.309 ms 2273 +Buddy_TopHat2D_Constant_Padding/1 0.776 ms 0.776 ms 855 +Buddy_BottomHat2D_Constant_Padding/1 0.774 ms 0.774 ms 856 +OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5148 +OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3185 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3143 +OpenCV_TopHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2699 +OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2714 +OpenCV_MorphGrad2D_Constant_Padding/1 0.251 ms 0.251 ms 2791 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5112 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index 9196ebc8..32a5db2d --- a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:19:39+00:00", + "date": "2025-06-01T10:05:52+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.07275,8.05029,8.72705], + "load_avg": [1.1958,1.28613,2.09277], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 140, - "real_time": 5.0083273622606486e+00, - "cpu_time": 5.0081867000000004e+00, + "iterations": 144, + "real_time": 4.8990769104825125e+00, + "cpu_time": 4.8990283611111103e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 95, - "real_time": 7.3605304485873173e+00, - "cpu_time": 7.3600212842105277e+00, + "iterations": 97, + "real_time": 7.2054376960107964e+00, + "cpu_time": 7.2053482680412344e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2233, - "real_time": 3.1330563174706644e-01, - "cpu_time": 3.1328374115539637e-01, + "iterations": 2257, + "real_time": 3.1050090547230164e-01, + "cpu_time": 3.1049665263624282e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 856, - "real_time": 8.1252799778480400e-01, - "cpu_time": 8.1250713200934543e-01, + "iterations": 878, + "real_time": 7.9843645877202718e-01, + "cpu_time": 7.9842614464692518e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 547, - "real_time": 1.2758781748672290e+00, - "cpu_time": 1.2757829634369293e+00, + "iterations": 560, + "real_time": 1.2482626529942666e+00, + "cpu_time": 1.2482019642857143e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4751, - "real_time": 1.4585499139904498e-01, - "cpu_time": 1.4584467838349827e-01, + "iterations": 4872, + "real_time": 1.4268146057067246e-01, + "cpu_time": 1.4267865578817737e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2631, - "real_time": 2.6639430358558497e-01, - "cpu_time": 2.6637908627898133e-01, + "iterations": 2654, + "real_time": 2.6207054195295354e-01, + "cpu_time": 2.6205789299171084e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103135, - "real_time": 6.7804008722305298e-03, - "cpu_time": 6.7801764095602830e-03, + "iterations": 105278, + "real_time": 6.6562565302057996e-03, + "cpu_time": 6.6561646592830431e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48881, - "real_time": 1.4308667188897338e-02, - "cpu_time": 1.4308491438391182e-02, + "iterations": 49913, + "real_time": 1.4036607057034684e-02, + "cpu_time": 1.4036086510528307e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3158, - "real_time": 2.2048866075473619e-01, - "cpu_time": 2.2047716117796104e-01, + "iterations": 3221, + "real_time": 2.1425787346753894e-01, + "cpu_time": 2.1425355448618436e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3109, - "real_time": 2.2516667965084228e-01, - "cpu_time": 2.2515504277902859e-01, + "iterations": 3271, + "real_time": 2.1439506175484507e-01, + "cpu_time": 2.1438375817792729e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2007, - "real_time": 3.4035718988202135e-01, - "cpu_time": 3.4034878375685157e-01, + "iterations": 2213, + "real_time": 3.1620805003203473e-01, + "cpu_time": 3.1619105106190648e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2062, - "real_time": 3.4142527042749898e-01, - "cpu_time": 3.4140258583899064e-01, + "iterations": 2230, + "real_time": 3.1144172203781356e-01, + "cpu_time": 3.1142306591928215e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 732, - "real_time": 9.0427116282243547e-01, - "cpu_time": 9.0420603961748647e-01, + "iterations": 866, + "real_time": 8.0038771876424342e-01, + "cpu_time": 8.0036796073903105e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 731, - "real_time": 9.0717502631420310e-01, - "cpu_time": 9.0713226538987568e-01, + "iterations": 846, + "real_time": 7.9733535127789135e-01, + "cpu_time": 7.9729864420803698e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5071, - "real_time": 1.3829381951705588e-01, - "cpu_time": 1.3829104515874571e-01, + "iterations": 5058, + "real_time": 1.3818436612685522e-01, + "cpu_time": 1.3818210241202053e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3192, - "real_time": 2.1901250266024194e-01, - "cpu_time": 2.1900474968671726e-01, + "iterations": 3149, + "real_time": 2.2202210187930915e-01, + "cpu_time": 2.2201345633534444e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3221, - "real_time": 2.1656987814476164e-01, - "cpu_time": 2.1656167028872991e-01, + "iterations": 3169, + "real_time": 2.2069638393379679e-01, + "cpu_time": 2.2069076932786405e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2767, - "real_time": 2.5171531484818088e-01, - "cpu_time": 2.5171004878930253e-01, + "iterations": 2725, + "real_time": 2.5712088160558577e-01, + "cpu_time": 2.5710837284403609e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2752, - "real_time": 2.5393080567334625e-01, - "cpu_time": 2.5391998219476775e-01, + "iterations": 2715, + "real_time": 2.5740468919167420e-01, + "cpu_time": 2.5739238526703495e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2708, - "real_time": 2.5875106429133671e-01, - "cpu_time": 2.5874283825701655e-01, + "iterations": 2798, + "real_time": 2.5014725085707373e-01, + "cpu_time": 2.5013296140099989e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4910, - "real_time": 1.4278720381485713e-01, - "cpu_time": 1.4278435274949061e-01, + "iterations": 5116, + "real_time": 1.3708527417645219e-01, + "cpu_time": 1.3708149824081331e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..875f2bd7 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:05:52+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.20, 1.29, 2.09 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.90 ms 4.90 ms 144 +MLIR_Conv2D/1 7.21 ms 7.21 ms 97 +Buddy_Conv2D/1 0.311 ms 0.310 ms 2257 +Buddy_Corr2D_Constant_Padding/1 0.798 ms 0.798 ms 878 +OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 560 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4872 +Buddy_Resize2D_Bilinear_Interpolation/1 0.262 ms 0.262 ms 2654 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105278 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49913 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3221 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3271 +Buddy_Opening2D_Constant_Padding/1 0.316 ms 0.316 ms 2213 +Buddy_Closing2D_Constant_Padding/1 0.311 ms 0.311 ms 2230 +Buddy_TopHat2D_Constant_Padding/1 0.800 ms 0.800 ms 866 +Buddy_BottomHat2D_Constant_Padding/1 0.797 ms 0.797 ms 846 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5058 +OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3149 +OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3169 +OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2725 +OpenCV_BottomHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2715 +OpenCV_MorphGrad2D_Constant_Padding/1 0.250 ms 0.250 ms 2798 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5116 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index 67855d27..7f40b284 --- a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:20:03+00:00", + "date": "2025-06-01T10:06:16+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.05127,7.65771,8.58301], + "load_avg": [1.12793,1.26172,2.06299], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 139, - "real_time": 5.0038629271786847e+00, - "cpu_time": 5.0033628776978416e+00, + "iterations": 147, + "real_time": 4.7720991151065242e+00, + "cpu_time": 4.7720388095238100e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 95, - "real_time": 7.3803350721534926e+00, - "cpu_time": 7.3799033789473709e+00, + "iterations": 97, + "real_time": 7.1972473956567722e+00, + "cpu_time": 7.1971010103092770e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2239, - "real_time": 3.1316074467413446e-01, - "cpu_time": 3.1314651808843225e-01, + "iterations": 2252, + "real_time": 3.1011967074463465e-01, + "cpu_time": 3.1011620293072806e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 863, - "real_time": 8.1503567386198428e-01, - "cpu_time": 8.1501769061413665e-01, + "iterations": 868, + "real_time": 8.0242218913227181e-01, + "cpu_time": 8.0239820852534560e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 548, - "real_time": 1.2776841457090238e+00, - "cpu_time": 1.2776387828467155e+00, + "iterations": 560, + "real_time": 1.2489744295765246e+00, + "cpu_time": 1.2489337464285726e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4797, - "real_time": 1.4566776900085679e-01, - "cpu_time": 1.4565851615593084e-01, + "iterations": 4854, + "real_time": 1.4281208113873500e-01, + "cpu_time": 1.4280516625463535e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2637, - "real_time": 2.6631619319302774e-01, - "cpu_time": 2.6631011452408027e-01, + "iterations": 2649, + "real_time": 2.6116495907711684e-01, + "cpu_time": 2.6115319365798423e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103585, - "real_time": 6.7529736044738674e-03, - "cpu_time": 6.7528078100110968e-03, + "iterations": 105099, + "real_time": 6.6859832037041806e-03, + "cpu_time": 6.6857738322914631e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48856, - "real_time": 1.4314036464081647e-02, - "cpu_time": 1.4313678872605214e-02, + "iterations": 49931, + "real_time": 1.4034184800308621e-02, + "cpu_time": 1.4033786765736724e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3141, - "real_time": 2.2273098908196357e-01, - "cpu_time": 2.2271567589939528e-01, + "iterations": 3253, + "real_time": 2.1422339406905452e-01, + "cpu_time": 2.1422094743313869e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3162, - "real_time": 2.2138736713241733e-01, - "cpu_time": 2.2137922707147351e-01, + "iterations": 3245, + "real_time": 2.1412110670781098e-01, + "cpu_time": 2.1411618335901397e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2082, - "real_time": 3.3552730303905426e-01, - "cpu_time": 3.3551034341978869e-01, + "iterations": 2229, + "real_time": 3.1633685711067638e-01, + "cpu_time": 3.1632788515029109e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2093, - "real_time": 3.2841352896290538e-01, - "cpu_time": 3.2839717821309145e-01, + "iterations": 2256, + "real_time": 3.1315658740197300e-01, + "cpu_time": 3.1313880939716321e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 727, - "real_time": 9.1686937061267837e-01, - "cpu_time": 9.1678499724896845e-01, + "iterations": 822, + "real_time": 8.0446341050077241e-01, + "cpu_time": 8.0443746593674081e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 732, - "real_time": 9.1540414962123651e-01, - "cpu_time": 9.1536734972677514e-01, + "iterations": 842, + "real_time": 7.9947983461151217e-01, + "cpu_time": 7.9944346437054481e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5002, - "real_time": 1.3968796960440505e-01, - "cpu_time": 1.3968174870051955e-01, + "iterations": 5153, + "real_time": 1.3539784398329691e-01, + "cpu_time": 1.3539321715505537e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3104, - "real_time": 2.2470024378355785e-01, - "cpu_time": 2.2469779349226782e-01, + "iterations": 3158, + "real_time": 2.1987897889515204e-01, + "cpu_time": 2.1986695028499065e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3100, - "real_time": 2.2293068047973416e-01, - "cpu_time": 2.2292174161290265e-01, + "iterations": 3163, + "real_time": 2.2139365028400343e-01, + "cpu_time": 2.2138768827062899e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2764, - "real_time": 2.5439042700441117e-01, - "cpu_time": 2.5438529232995610e-01, + "iterations": 2727, + "real_time": 2.5654320311734297e-01, + "cpu_time": 2.5653315914924757e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2700, - "real_time": 2.6047515372435254e-01, - "cpu_time": 2.6046163111111165e-01, + "iterations": 2742, + "real_time": 2.5476464549287403e-01, + "cpu_time": 2.5475962800875301e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2725, - "real_time": 2.5558024166374033e-01, - "cpu_time": 2.5557356660550518e-01, + "iterations": 2826, + "real_time": 2.4769082797265440e-01, + "cpu_time": 2.4768630750176840e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4989, - "real_time": 1.3989395721268383e-01, - "cpu_time": 1.3988953577871338e-01, + "iterations": 5148, + "real_time": 1.3564416607545157e-01, + "cpu_time": 1.3563850446775436e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..9b6ab275 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:06:16+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.13, 1.26, 2.06 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.77 ms 4.77 ms 147 +MLIR_Conv2D/1 7.20 ms 7.20 ms 97 +Buddy_Conv2D/1 0.310 ms 0.310 ms 2252 +Buddy_Corr2D_Constant_Padding/1 0.802 ms 0.802 ms 868 +OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 560 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4854 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2649 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105099 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49931 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3253 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3245 +Buddy_Opening2D_Constant_Padding/1 0.316 ms 0.316 ms 2229 +Buddy_Closing2D_Constant_Padding/1 0.313 ms 0.313 ms 2256 +Buddy_TopHat2D_Constant_Padding/1 0.804 ms 0.804 ms 822 +Buddy_BottomHat2D_Constant_Padding/1 0.799 ms 0.799 ms 842 +OpenCV_Erode2D_Constant_Padding/1 0.135 ms 0.135 ms 5153 +OpenCV_Opening2D_Constant_Padding/1 0.220 ms 0.220 ms 3158 +OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3163 +OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2727 +OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2742 +OpenCV_MorphGrad2D_Constant_Padding/1 0.248 ms 0.248 ms 2826 +OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5148 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index 6960ba2a..6ff72a1b --- a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:20:27+00:00", + "date": "2025-06-01T10:06:40+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.09033,7.21777,8.41211], + "load_avg": [1.0835,1.23975,2.03369], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 140, - "real_time": 4.9988453675593645e+00, - "cpu_time": 4.9986953928571429e+00, + "iterations": 143, + "real_time": 4.9128080555400651e+00, + "cpu_time": 4.9125261748251745e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 95, - "real_time": 7.3694647730965359e+00, - "cpu_time": 7.3690518526315794e+00, + "iterations": 98, + "real_time": 7.1688128103102953e+00, + "cpu_time": 7.1684497448979574e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2227, - "real_time": 3.1326443532362186e-01, - "cpu_time": 3.1325685136955544e-01, + "iterations": 2260, + "real_time": 3.0970742464461160e-01, + "cpu_time": 3.0970234823008863e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 857, - "real_time": 8.1497179333429215e-01, - "cpu_time": 8.1495055892648771e-01, + "iterations": 875, + "real_time": 7.9474829563072746e-01, + "cpu_time": 7.9473616228571387e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 548, - "real_time": 1.2764532125833696e+00, - "cpu_time": 1.2764195474452551e+00, + "iterations": 560, + "real_time": 1.2486339812832219e+00, + "cpu_time": 1.2485831892857155e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4786, - "real_time": 1.4544369842011809e-01, - "cpu_time": 1.4543611554534056e-01, + "iterations": 4871, + "real_time": 1.4330328636604078e-01, + "cpu_time": 1.4329935413672748e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2621, - "real_time": 2.6697471229027631e-01, - "cpu_time": 2.6696165433040830e-01, + "iterations": 2651, + "real_time": 2.6097617655576555e-01, + "cpu_time": 2.6096452583930574e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103139, - "real_time": 6.7890966993477686e-03, - "cpu_time": 6.7888171981500727e-03, + "iterations": 104620, + "real_time": 6.6914885511215544e-03, + "cpu_time": 6.6913837507168832e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48916, - "real_time": 1.4305655840777915e-02, - "cpu_time": 1.4305205740453016e-02, + "iterations": 49783, + "real_time": 1.4034632352513720e-02, + "cpu_time": 1.4034190526886668e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3055, - "real_time": 2.2843102764478487e-01, - "cpu_time": 2.2842420523731577e-01, + "iterations": 3101, + "real_time": 2.1835735265342160e-01, + "cpu_time": 2.1835113382779719e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3073, - "real_time": 2.6378849096380458e-01, - "cpu_time": 2.6377914480963227e-01, + "iterations": 3270, + "real_time": 2.1373488703907811e-01, + "cpu_time": 2.1372451253822639e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1855, - "real_time": 3.8777429439790168e-01, - "cpu_time": 3.8775405013477082e-01, + "iterations": 2180, + "real_time": 3.1882228346866204e-01, + "cpu_time": 3.1881019403669730e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2033, - "real_time": 3.7358192460354012e-01, - "cpu_time": 3.7357057255287696e-01, + "iterations": 2262, + "real_time": 3.1177783887224003e-01, + "cpu_time": 3.1175983333333290e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 727, - "real_time": 9.1089915479065786e-01, - "cpu_time": 9.1081009628610654e-01, + "iterations": 841, + "real_time": 8.1441226314562820e-01, + "cpu_time": 8.1438480975029681e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 732, - "real_time": 8.9685928809349658e-01, - "cpu_time": 8.9679970081967098e-01, + "iterations": 849, + "real_time": 8.1958065360117571e-01, + "cpu_time": 8.1955526266195466e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5021, - "real_time": 1.3921439745610892e-01, - "cpu_time": 1.3920651204939244e-01, + "iterations": 5157, + "real_time": 1.3543392134447857e-01, + "cpu_time": 1.3543092495636982e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3154, - "real_time": 2.2428087977551930e-01, - "cpu_time": 2.2427543595434415e-01, + "iterations": 3187, + "real_time": 2.1921325934355934e-01, + "cpu_time": 2.1920577282711085e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3178, - "real_time": 2.1991694007300971e-01, - "cpu_time": 2.1990786815607272e-01, + "iterations": 3207, + "real_time": 2.1817837297303080e-01, + "cpu_time": 2.1817245837231020e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2759, - "real_time": 2.5378466030530800e-01, - "cpu_time": 2.5377213990576297e-01, + "iterations": 2745, + "real_time": 2.5516484061659794e-01, + "cpu_time": 2.5515719052823399e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2757, - "real_time": 2.5706717695150422e-01, - "cpu_time": 2.5705736452665900e-01, + "iterations": 2766, + "real_time": 2.5253382525578499e-01, + "cpu_time": 2.5252827476500311e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2734, - "real_time": 2.5537159994255476e-01, - "cpu_time": 2.5536476700804656e-01, + "iterations": 2808, + "real_time": 2.4893482221009214e-01, + "cpu_time": 2.4892937749287819e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5010, - "real_time": 1.3908601129899720e-01, - "cpu_time": 1.3908382435129729e-01, + "iterations": 5102, + "real_time": 1.3665569254705739e-01, + "cpu_time": 1.3665094943159542e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..c4b29991 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:06:40+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.08, 1.24, 2.03 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.91 ms 4.91 ms 143 +MLIR_Conv2D/1 7.17 ms 7.17 ms 98 +Buddy_Conv2D/1 0.310 ms 0.310 ms 2260 +Buddy_Corr2D_Constant_Padding/1 0.795 ms 0.795 ms 875 +OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 560 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4871 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2651 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104620 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49783 +Buddy_Erosion2D_Constant_Padding/1 0.218 ms 0.218 ms 3101 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3270 +Buddy_Opening2D_Constant_Padding/1 0.319 ms 0.319 ms 2180 +Buddy_Closing2D_Constant_Padding/1 0.312 ms 0.312 ms 2262 +Buddy_TopHat2D_Constant_Padding/1 0.814 ms 0.814 ms 841 +Buddy_BottomHat2D_Constant_Padding/1 0.820 ms 0.820 ms 849 +OpenCV_Erode2D_Constant_Padding/1 0.135 ms 0.135 ms 5157 +OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3187 +OpenCV_Closing2D_Constant_Padding/1 0.218 ms 0.218 ms 3207 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2745 +OpenCV_BottomHat2D_Constant_Padding/1 0.253 ms 0.253 ms 2766 +OpenCV_MorphGrad2D_Constant_Padding/1 0.249 ms 0.249 ms 2808 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5102 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index 72e90414..b9088742 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:20:51+00:00", + "date": "2025-06-01T10:07:04+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.05859,6.79785,8.24023], + "load_avg": [1.0542,1.21924,2.00439], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 60, - "real_time": 1.1761240319659313e+01, - "cpu_time": 1.1760665933333334e+01, + "real_time": 1.1551547423005104e+01, + "cpu_time": 1.1550767250000002e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23, - "real_time": 2.9861296891518261e+01, - "cpu_time": 2.9859531086956522e+01, + "iterations": 24, + "real_time": 2.9155427357181907e+01, + "cpu_time": 2.9154965874999991e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 524, - "real_time": 1.2968991285915139e+00, - "cpu_time": 1.2968563473282440e+00, + "iterations": 536, + "real_time": 1.3085197372389819e+00, + "cpu_time": 1.3084801436567159e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 294, - "real_time": 2.3691147109683679e+00, - "cpu_time": 2.3690150918367365e+00, + "iterations": 300, + "real_time": 2.3306774348020554e+00, + "cpu_time": 2.3306083266666664e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 166, - "real_time": 4.1967903499502732e+00, - "cpu_time": 4.1966355481927673e+00, + "iterations": 170, + "real_time": 4.1107838863835617e+00, + "cpu_time": 4.1106729823529387e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4766, - "real_time": 1.4561849387938666e-01, - "cpu_time": 1.4560848174569879e-01, + "iterations": 4854, + "real_time": 1.4268877356937504e-01, + "cpu_time": 1.4268450350226619e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2632, - "real_time": 2.6634444856285866e-01, - "cpu_time": 2.6633077697568358e-01, + "iterations": 2689, + "real_time": 2.6141061780398644e-01, + "cpu_time": 2.6139920379323189e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102992, - "real_time": 6.7888280357693833e-03, - "cpu_time": 6.7885894729687781e-03, + "iterations": 105080, + "real_time": 6.6532106587232909e-03, + "cpu_time": 6.6528616768176665e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48914, - "real_time": 1.4305232184815593e-02, - "cpu_time": 1.4304699186327010e-02, + "iterations": 49721, + "real_time": 1.4036623706550731e-02, + "cpu_time": 1.4035942217574073e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3110, - "real_time": 2.2340725712070894e-01, - "cpu_time": 2.2339657749196118e-01, + "iterations": 3235, + "real_time": 2.1639395004262144e-01, + "cpu_time": 2.1638765935084969e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3094, - "real_time": 2.2614223890984220e-01, - "cpu_time": 2.2612052133160956e-01, + "iterations": 3260, + "real_time": 2.1409400641643928e-01, + "cpu_time": 2.1408867822085853e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1992, - "real_time": 3.5395631080411044e-01, - "cpu_time": 3.5392976907630508e-01, + "iterations": 2249, + "real_time": 3.0992835057794171e-01, + "cpu_time": 3.0992358070253456e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1972, - "real_time": 3.4787828522086384e-01, - "cpu_time": 3.4785186156186626e-01, + "iterations": 2139, + "real_time": 3.1172140541061055e-01, + "cpu_time": 3.1170241748480609e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 731, - "real_time": 9.0749642305922085e-01, - "cpu_time": 9.0745977838577285e-01, + "iterations": 826, + "real_time": 7.8035605570738886e-01, + "cpu_time": 7.8032884140435832e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 740, - "real_time": 9.0477319837019254e-01, - "cpu_time": 9.0473099999999862e-01, + "iterations": 830, + "real_time": 7.8195727104882162e-01, + "cpu_time": 7.8193199518072320e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5054, - "real_time": 1.3808638364560150e-01, - "cpu_time": 1.3808249604273823e-01, + "iterations": 5049, + "real_time": 1.3847981230424972e-01, + "cpu_time": 1.3847714319667237e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3141, - "real_time": 2.2523653856871501e-01, - "cpu_time": 2.2523184909264546e-01, + "iterations": 3095, + "real_time": 2.2573410586028184e-01, + "cpu_time": 2.2572844491114658e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3119, - "real_time": 2.2439105939639611e-01, - "cpu_time": 2.2438301987816528e-01, + "iterations": 3109, + "real_time": 2.2543651156963815e-01, + "cpu_time": 2.2543004728208380e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2692, - "real_time": 2.8906816154452702e-01, - "cpu_time": 2.8906200445765301e-01, + "iterations": 2690, + "real_time": 2.6003630437150765e-01, + "cpu_time": 2.6002171301115301e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2695, - "real_time": 2.6125356983497106e-01, - "cpu_time": 2.6124337476808834e-01, + "iterations": 2688, + "real_time": 2.6027442418992341e-01, + "cpu_time": 2.6026942299107153e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2680, - "real_time": 2.6039684177445832e-01, - "cpu_time": 2.6038943208955156e-01, + "iterations": 2759, + "real_time": 2.5280932354058594e-01, + "cpu_time": 2.5280226567597014e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5013, - "real_time": 1.3913510936880311e-01, - "cpu_time": 1.3912800039896264e-01, + "iterations": 5116, + "real_time": 1.3649669214442336e-01, + "cpu_time": 1.3649272341673235e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..3e54ca71 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:07:04+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.05, 1.22, 2.00 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.6 ms 11.6 ms 60 +MLIR_Conv2D/1 29.2 ms 29.2 ms 24 +Buddy_Conv2D/1 1.31 ms 1.31 ms 536 +Buddy_Corr2D_Constant_Padding/1 2.33 ms 2.33 ms 300 +OpenCV_Filter2D_Constant_Padding/1 4.11 ms 4.11 ms 170 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4854 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2689 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105080 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49721 +Buddy_Erosion2D_Constant_Padding/1 0.216 ms 0.216 ms 3235 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3260 +Buddy_Opening2D_Constant_Padding/1 0.310 ms 0.310 ms 2249 +Buddy_Closing2D_Constant_Padding/1 0.312 ms 0.312 ms 2139 +Buddy_TopHat2D_Constant_Padding/1 0.780 ms 0.780 ms 826 +Buddy_BottomHat2D_Constant_Padding/1 0.782 ms 0.782 ms 830 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5049 +OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3095 +OpenCV_Closing2D_Constant_Padding/1 0.225 ms 0.225 ms 3109 +OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2690 +OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2688 +OpenCV_MorphGrad2D_Constant_Padding/1 0.253 ms 0.253 ms 2759 +OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5116 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index 6ffdee9c..9063bd22 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:21:15+00:00", + "date": "2025-06-01T10:07:28+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.0376,6.41113,8.07324], + "load_avg": [1.03467,1.2002,1.97705], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 59, - "real_time": 1.1810072113649319e+01, - "cpu_time": 1.1809691406779663e+01, + "iterations": 60, + "real_time": 1.1487875661502281e+01, + "cpu_time": 1.1487358733333334e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 24, - "real_time": 2.9850440565496683e+01, - "cpu_time": 2.9848709291666665e+01, + "real_time": 2.9097553264970582e+01, + "cpu_time": 2.9096759458333334e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 542, - "real_time": 1.3058845338676248e+00, - "cpu_time": 1.3058292250922505e+00, + "iterations": 508, + "real_time": 1.3769478097147361e+00, + "cpu_time": 1.3769043287401574e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 293, - "real_time": 2.3760891967666029e+00, - "cpu_time": 2.3760385187713329e+00, + "iterations": 301, + "real_time": 2.3193448484934049e+00, + "cpu_time": 2.3192444285714293e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 166, - "real_time": 4.2043444307812727e+00, - "cpu_time": 4.2041359457831291e+00, + "iterations": 170, + "real_time": 4.1048574535285729e+00, + "cpu_time": 4.1047252352941150e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4777, - "real_time": 1.4558693665838751e-01, - "cpu_time": 1.4558089763449858e-01, + "iterations": 4857, + "real_time": 1.4277608228768002e-01, + "cpu_time": 1.4277288326127241e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2635, - "real_time": 2.6631031602451427e-01, - "cpu_time": 2.6628476432637554e-01, + "iterations": 2686, + "real_time": 2.6132201128212801e-01, + "cpu_time": 2.6131442181682785e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102983, - "real_time": 6.7953448704826435e-03, - "cpu_time": 6.7950746045463790e-03, + "iterations": 105064, + "real_time": 6.6562829248287755e-03, + "cpu_time": 6.6561413709738835e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48881, - "real_time": 1.4309612515319918e-02, - "cpu_time": 1.4309047564493374e-02, + "iterations": 49925, + "real_time": 1.4008483739871052e-02, + "cpu_time": 1.4008090896344512e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3156, - "real_time": 2.2085438776378849e-01, - "cpu_time": 2.2084482160963267e-01, + "iterations": 3267, + "real_time": 2.1407247116469375e-01, + "cpu_time": 2.1406563881236618e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3159, - "real_time": 2.2232111032796306e-01, - "cpu_time": 2.2231153276353291e-01, + "iterations": 3269, + "real_time": 2.1407944222917322e-01, + "cpu_time": 2.1407154512083224e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2113, - "real_time": 3.3031660275356611e-01, - "cpu_time": 3.3030707903454742e-01, + "iterations": 2235, + "real_time": 3.1582788383000648e-01, + "cpu_time": 3.1582353601789687e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2087, - "real_time": 3.3365085249776527e-01, - "cpu_time": 3.3363314374700537e-01, + "iterations": 2209, + "real_time": 3.1493820297253949e-01, + "cpu_time": 3.1491848709823395e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 736, - "real_time": 8.7698101339375845e-01, - "cpu_time": 8.7694497826086870e-01, + "iterations": 841, + "real_time": 8.0088509399528029e-01, + "cpu_time": 8.0083224970273426e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 740, - "real_time": 8.9143257638489881e-01, - "cpu_time": 8.9139059729729575e-01, + "iterations": 852, + "real_time": 7.8501773368514760e-01, + "cpu_time": 7.8499030399061098e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5026, - "real_time": 1.3946750417345793e-01, - "cpu_time": 1.3946417011539988e-01, + "iterations": 5129, + "real_time": 1.3646853262975123e-01, + "cpu_time": 1.3646564866445693e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3102, - "real_time": 2.2649793296591225e-01, - "cpu_time": 2.2648724597034239e-01, + "iterations": 3105, + "real_time": 2.2545198155582047e-01, + "cpu_time": 2.2544327149758506e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3074, - "real_time": 2.2739966605379741e-01, - "cpu_time": 2.2739106668835357e-01, + "iterations": 3082, + "real_time": 2.2723694984051099e-01, + "cpu_time": 2.2723045360155805e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2684, - "real_time": 2.5760428355409920e-01, - "cpu_time": 2.5759685879284627e-01, + "iterations": 2679, + "real_time": 2.6105903089046478e-01, + "cpu_time": 2.6105058268010423e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2660, - "real_time": 2.6262844877695679e-01, - "cpu_time": 2.6261966090225614e-01, + "iterations": 2672, + "real_time": 2.6178982354209807e-01, + "cpu_time": 2.6178556474550924e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2685, - "real_time": 2.6083636883250827e-01, - "cpu_time": 2.6082749199255079e-01, + "iterations": 2751, + "real_time": 2.5383601799981026e-01, + "cpu_time": 2.5382809596510303e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4970, - "real_time": 1.4014078053070506e-01, - "cpu_time": 1.4013356338028188e-01, + "iterations": 5094, + "real_time": 1.3636100326431091e-01, + "cpu_time": 1.3635804652532396e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..0ea453df --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:07:28+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.03, 1.20, 1.98 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.5 ms 11.5 ms 60 +MLIR_Conv2D/1 29.1 ms 29.1 ms 24 +Buddy_Conv2D/1 1.38 ms 1.38 ms 508 +Buddy_Corr2D_Constant_Padding/1 2.32 ms 2.32 ms 301 +OpenCV_Filter2D_Constant_Padding/1 4.10 ms 4.10 ms 170 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4857 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2686 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105064 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49925 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3267 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3269 +Buddy_Opening2D_Constant_Padding/1 0.316 ms 0.316 ms 2235 +Buddy_Closing2D_Constant_Padding/1 0.315 ms 0.315 ms 2209 +Buddy_TopHat2D_Constant_Padding/1 0.801 ms 0.801 ms 841 +Buddy_BottomHat2D_Constant_Padding/1 0.785 ms 0.785 ms 852 +OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5129 +OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3105 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3082 +OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2679 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2672 +OpenCV_MorphGrad2D_Constant_Padding/1 0.254 ms 0.254 ms 2751 +OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5094 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index 635d3b03..2ed991ba --- a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:21:40+00:00", + "date": "2025-06-01T10:07:52+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.02344,6.05566,7.91064], + "load_avg": [1.02393,1.18652,1.95557], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 31, - "real_time": 2.2180669430282808e+01, - "cpu_time": 2.2179082290322576e+01, + "real_time": 2.1483107620189266e+01, + "cpu_time": 2.1482540354838711e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 10, - "real_time": 6.8137688748538494e+01, - "cpu_time": 6.8135333700000004e+01, + "iterations": 11, + "real_time": 6.6662094640460879e+01, + "cpu_time": 6.6661596272727294e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 300, - "real_time": 2.3162953307231269e+00, - "cpu_time": 2.3161617966666670e+00, + "iterations": 312, + "real_time": 2.2404276921103397e+00, + "cpu_time": 2.2403709903846161e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 148, - "real_time": 4.7046484526347472e+00, - "cpu_time": 4.7044905743243222e+00, + "iterations": 150, + "real_time": 4.6722977980971336e+00, + "cpu_time": 4.6722623466666642e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 79, - "real_time": 8.8026538915649244e+00, - "cpu_time": 8.8021716329113904e+00, + "iterations": 81, + "real_time": 8.6062704615386920e+00, + "cpu_time": 8.6059275802469042e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4765, - "real_time": 1.4569725829556757e-01, - "cpu_time": 1.4568953284365163e-01, + "iterations": 4847, + "real_time": 1.4253131920759618e-01, + "cpu_time": 1.4252792098205075e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2606, - "real_time": 2.6624611809746812e-01, - "cpu_time": 2.6623398656945507e-01, + "iterations": 2686, + "real_time": 2.6136482784930931e-01, + "cpu_time": 2.6135366939687277e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103232, - "real_time": 6.7792688108119200e-03, - "cpu_time": 6.7791017320210810e-03, + "iterations": 105200, + "real_time": 6.6543605086524680e-03, + "cpu_time": 6.6542239543726190e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48866, - "real_time": 1.4305006073369965e-02, - "cpu_time": 1.4304623807964635e-02, + "iterations": 49717, + "real_time": 1.4051802086674765e-02, + "cpu_time": 1.4051148580968290e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3108, - "real_time": 2.2251177711011974e-01, - "cpu_time": 2.2250371010296024e-01, + "iterations": 3275, + "real_time": 2.1293145497791641e-01, + "cpu_time": 2.1291816183206122e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3124, - "real_time": 2.2213796699817903e-01, - "cpu_time": 2.2213028777208693e-01, + "iterations": 3279, + "real_time": 2.1357018615313150e-01, + "cpu_time": 2.1355237785910328e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1961, - "real_time": 3.3011947117825663e-01, - "cpu_time": 3.3010722947475762e-01, + "iterations": 2249, + "real_time": 3.0966895674270434e-01, + "cpu_time": 3.0966282303245918e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2094, - "real_time": 3.4953010870711282e-01, - "cpu_time": 3.4951667430754529e-01, + "iterations": 2267, + "real_time": 3.1410488470514597e-01, + "cpu_time": 3.1408243140714548e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 764, - "real_time": 8.9931203764302570e-01, - "cpu_time": 8.9927609816753939e-01, + "iterations": 827, + "real_time": 7.8919717755974994e-01, + "cpu_time": 7.8917250060459587e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 784, - "real_time": 8.8094642186271288e-01, - "cpu_time": 8.8090844642857091e-01, + "iterations": 845, + "real_time": 7.6306458099706642e-01, + "cpu_time": 7.6302103786982345e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4984, - "real_time": 1.4027801826656056e-01, - "cpu_time": 1.4027250120385235e-01, + "iterations": 5188, + "real_time": 1.3468034979050769e-01, + "cpu_time": 1.3467689070932889e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3103, - "real_time": 2.2453489277272926e-01, - "cpu_time": 2.2452527747341289e-01, + "iterations": 3054, + "real_time": 2.2879175807653726e-01, + "cpu_time": 2.2878261100196512e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3089, - "real_time": 2.2432715479765714e-01, - "cpu_time": 2.2432135351246352e-01, + "iterations": 3052, + "real_time": 2.2905985098503051e-01, + "cpu_time": 2.2905445674967184e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2707, - "real_time": 2.5748399006822781e-01, - "cpu_time": 2.5747665644625017e-01, + "iterations": 2667, + "real_time": 2.6218339879055974e-01, + "cpu_time": 2.6217200074990588e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2743, - "real_time": 2.5845320051250081e-01, - "cpu_time": 2.5844790521327066e-01, + "iterations": 2674, + "real_time": 2.6174741828495118e-01, + "cpu_time": 2.6173521727748611e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2695, - "real_time": 2.6067187923226154e-01, - "cpu_time": 2.6066210909090842e-01, + "iterations": 2759, + "real_time": 2.5373818531724240e-01, + "cpu_time": 2.5372534541500574e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4997, - "real_time": 1.3992553683061562e-01, - "cpu_time": 1.3991992815689469e-01, + "iterations": 5114, + "real_time": 1.3672967110299766e-01, + "cpu_time": 1.3672548572545917e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..eb8ab690 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:07:52+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.02, 1.19, 1.96 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 21.5 ms 21.5 ms 31 +MLIR_Conv2D/1 66.7 ms 66.7 ms 11 +Buddy_Conv2D/1 2.24 ms 2.24 ms 312 +Buddy_Corr2D_Constant_Padding/1 4.67 ms 4.67 ms 150 +OpenCV_Filter2D_Constant_Padding/1 8.61 ms 8.61 ms 81 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4847 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2686 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105200 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49717 +Buddy_Erosion2D_Constant_Padding/1 0.213 ms 0.213 ms 3275 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3279 +Buddy_Opening2D_Constant_Padding/1 0.310 ms 0.310 ms 2249 +Buddy_Closing2D_Constant_Padding/1 0.314 ms 0.314 ms 2267 +Buddy_TopHat2D_Constant_Padding/1 0.789 ms 0.789 ms 827 +Buddy_BottomHat2D_Constant_Padding/1 0.763 ms 0.763 ms 845 +OpenCV_Erode2D_Constant_Padding/1 0.135 ms 0.135 ms 5188 +OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3054 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3052 +OpenCV_TopHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2667 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2674 +OpenCV_MorphGrad2D_Constant_Padding/1 0.254 ms 0.254 ms 2759 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5114 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index 8f78eddd..6b3c7804 --- a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:22:04+00:00", + "date": "2025-06-01T10:08:16+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.01465,5.729,7.75244], + "load_avg": [1.01514,1.17041,1.92871], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 32, - "real_time": 2.1972846647258848e+01, - "cpu_time": 2.1971798968749997e+01, + "real_time": 2.1583709167316556e+01, + "cpu_time": 2.1582785937499999e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 10, - "real_time": 6.8126448430120945e+01, - "cpu_time": 6.8123335000000012e+01, + "real_time": 6.6664919815957546e+01, + "cpu_time": 6.6663032099999995e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 304, - "real_time": 2.2191465893564257e+00, - "cpu_time": 2.2190450559210526e+00, + "iterations": 299, + "real_time": 2.3427403229833845e+00, + "cpu_time": 2.3426287826086951e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 149, - "real_time": 4.6995133036535055e+00, - "cpu_time": 4.6994745369127537e+00, + "iterations": 150, + "real_time": 4.6730378891030950e+00, + "cpu_time": 4.6729712266666672e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 79, - "real_time": 8.7998903156081330e+00, - "cpu_time": 8.7994201518987403e+00, + "iterations": 81, + "real_time": 8.5960494837275263e+00, + "cpu_time": 8.5955845432098688e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4719, - "real_time": 1.4579868286336523e-01, - "cpu_time": 1.4579433396906127e-01, + "iterations": 4853, + "real_time": 1.4254460892320145e-01, + "cpu_time": 1.4253487471667009e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2629, - "real_time": 2.6617135984356205e-01, - "cpu_time": 2.6616194903004953e-01, + "iterations": 2693, + "real_time": 2.6023447322681675e-01, + "cpu_time": 2.6022185480876342e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102880, - "real_time": 6.7913589396544419e-03, - "cpu_time": 6.7911381706842900e-03, + "iterations": 105142, + "real_time": 6.6607927815345129e-03, + "cpu_time": 6.6606426451846111e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48902, - "real_time": 1.4305065534660009e-02, - "cpu_time": 1.4304335098768956e-02, + "iterations": 50003, + "real_time": 1.4013960396383099e-02, + "cpu_time": 1.4013452032878017e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3148, - "real_time": 2.2171691635190455e-01, - "cpu_time": 2.2170603208386286e-01, + "iterations": 3229, + "real_time": 2.1446620847611059e-01, + "cpu_time": 2.1445776308454609e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3112, - "real_time": 2.2349217224262827e-01, - "cpu_time": 2.2347660314910001e-01, + "iterations": 3262, + "real_time": 2.1735038653993666e-01, + "cpu_time": 2.1733955395462870e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1703, - "real_time": 4.0066563012980744e-01, - "cpu_time": 4.0064513623018178e-01, + "iterations": 2262, + "real_time": 3.0764156028588824e-01, + "cpu_time": 3.0763562245800136e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1933, - "real_time": 3.7044749353842232e-01, - "cpu_time": 3.7041857320227567e-01, + "iterations": 2236, + "real_time": 3.0961510690332311e-01, + "cpu_time": 3.0960087119856816e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 724, - "real_time": 9.4048063224438805e-01, - "cpu_time": 9.4042722513812205e-01, + "iterations": 855, + "real_time": 7.7676248480702004e-01, + "cpu_time": 7.7674190058479520e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 704, - "real_time": 9.0413066209293902e-01, - "cpu_time": 9.0407184517045247e-01, + "iterations": 826, + "real_time": 7.9636755636182877e-01, + "cpu_time": 7.9630684140435715e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5045, - "real_time": 1.3871030075285432e-01, - "cpu_time": 1.3870866759167522e-01, + "iterations": 5125, + "real_time": 1.3618563297318248e-01, + "cpu_time": 1.3618100897560950e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3039, - "real_time": 2.2786769644529187e-01, - "cpu_time": 2.2785653142481077e-01, + "iterations": 3079, + "real_time": 2.2738466971529966e-01, + "cpu_time": 2.2736882591750601e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3127, - "real_time": 2.2779202173893773e-01, - "cpu_time": 2.2778226670930599e-01, + "iterations": 3097, + "real_time": 2.2597094918975608e-01, + "cpu_time": 2.2596501485308346e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2665, - "real_time": 2.6074036131097494e-01, - "cpu_time": 2.6072306228893072e-01, + "iterations": 2680, + "real_time": 2.6094424677317712e-01, + "cpu_time": 2.6093131977611894e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2664, - "real_time": 2.6070837815244635e-01, - "cpu_time": 2.6070265427927913e-01, + "iterations": 2694, + "real_time": 2.5972596120086527e-01, + "cpu_time": 2.5972145916852268e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2666, - "real_time": 2.6246782640988719e-01, - "cpu_time": 2.6245429219804978e-01, + "iterations": 2766, + "real_time": 2.5288170083700018e-01, + "cpu_time": 2.5287643926247261e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4964, - "real_time": 1.4095451220521804e-01, - "cpu_time": 1.4094873912167652e-01, + "iterations": 4993, + "real_time": 1.3998398830005837e-01, + "cpu_time": 1.3997966553174421e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..0b7b2543 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:08:16+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.02, 1.17, 1.93 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 21.6 ms 21.6 ms 32 +MLIR_Conv2D/1 66.7 ms 66.7 ms 10 +Buddy_Conv2D/1 2.34 ms 2.34 ms 299 +Buddy_Corr2D_Constant_Padding/1 4.67 ms 4.67 ms 150 +OpenCV_Filter2D_Constant_Padding/1 8.60 ms 8.60 ms 81 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4853 +Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2693 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105142 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 50003 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3229 +Buddy_Dilation2D_Constant_Padding/1 0.217 ms 0.217 ms 3262 +Buddy_Opening2D_Constant_Padding/1 0.308 ms 0.308 ms 2262 +Buddy_Closing2D_Constant_Padding/1 0.310 ms 0.310 ms 2236 +Buddy_TopHat2D_Constant_Padding/1 0.777 ms 0.777 ms 855 +Buddy_BottomHat2D_Constant_Padding/1 0.796 ms 0.796 ms 826 +OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5125 +OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 3079 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3097 +OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2680 +OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2694 +OpenCV_MorphGrad2D_Constant_Padding/1 0.253 ms 0.253 ms 2766 +OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4993 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index 3b314876..524f2941 --- a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:22:28+00:00", + "date": "2025-06-01T10:08:40+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.00977,5.48682,7.62891], + "load_avg": [1.00879,1.15576,1.90234], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 20, - "real_time": 3.5165282245725393e+01, - "cpu_time": 3.5164104549999998e+01, + "iterations": 21, + "real_time": 3.4179400918739184e+01, + "cpu_time": 3.4179150904761904e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 6, - "real_time": 1.2213348659376304e+02, - "cpu_time": 1.2212578466666663e+02, + "real_time": 1.1945824356128772e+02, + "cpu_time": 1.1945671849999998e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 167, - "real_time": 4.2077098376379753e+00, - "cpu_time": 4.2074904550898209e+00, + "iterations": 179, + "real_time": 3.9118556045620134e+00, + "cpu_time": 3.9117745865921782e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 89, - "real_time": 7.8985005868284892e+00, - "cpu_time": 7.8981263483146078e+00, + "iterations": 90, + "real_time": 7.7935500484373836e+00, + "cpu_time": 7.7934364222222205e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 118, - "real_time": 5.9179596729197748e+00, - "cpu_time": 5.9176466271186516e+00, + "iterations": 119, + "real_time": 5.8913991450011229e+00, + "cpu_time": 5.8910765714285711e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4760, - "real_time": 1.4632203725769238e-01, - "cpu_time": 1.4631159096638646e-01, + "iterations": 4837, + "real_time": 1.4248181189144105e-01, + "cpu_time": 1.4247768864998964e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2636, - "real_time": 2.6651023998743484e-01, - "cpu_time": 2.6649754059180569e-01, + "iterations": 2692, + "real_time": 2.6031898093834677e-01, + "cpu_time": 2.6030977674591388e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103065, - "real_time": 6.8117591128138560e-03, - "cpu_time": 6.8116361713481842e-03, + "iterations": 105099, + "real_time": 6.6508404265951922e-03, + "cpu_time": 6.6507078278575452e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 47744, - "real_time": 1.4486571433067561e-02, - "cpu_time": 1.4486011121816375e-02, + "iterations": 49521, + "real_time": 1.4164025459980720e-02, + "cpu_time": 1.4163677369196903e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3166, - "real_time": 2.2039369668809206e-01, - "cpu_time": 2.2038528837650059e-01, + "iterations": 3257, + "real_time": 2.1447988594734929e-01, + "cpu_time": 2.1447408136321755e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3164, - "real_time": 2.2002138955666956e-01, - "cpu_time": 2.2001238084702895e-01, + "iterations": 3222, + "real_time": 2.1550155167880078e-01, + "cpu_time": 2.1549107014276825e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2083, - "real_time": 3.2940170711127259e-01, - "cpu_time": 3.2938720835333585e-01, + "iterations": 2228, + "real_time": 3.2929575264694660e-01, + "cpu_time": 3.2929014048473909e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2139, - "real_time": 3.3093405893580163e-01, - "cpu_time": 3.3092209537166950e-01, + "iterations": 2221, + "real_time": 3.1350365177783168e-01, + "cpu_time": 3.1348973390364687e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 739, - "real_time": 8.7742192143919662e-01, - "cpu_time": 8.7732872395128714e-01, + "iterations": 845, + "real_time": 7.8893084321501694e-01, + "cpu_time": 7.8890399999999949e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 755, - "real_time": 8.6296645566722419e-01, - "cpu_time": 8.6293647549668850e-01, + "iterations": 825, + "real_time": 7.9270076571088843e-01, + "cpu_time": 7.9265848606060751e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5029, - "real_time": 1.3867238168688933e-01, - "cpu_time": 1.3867003002585018e-01, + "iterations": 5117, + "real_time": 1.3651657002860268e-01, + "cpu_time": 1.3651407563025220e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3140, - "real_time": 2.1959820489405066e-01, - "cpu_time": 2.1958653949044596e-01, + "iterations": 3176, + "real_time": 2.2036766208783354e-01, + "cpu_time": 2.2036350566750670e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3204, - "real_time": 2.2206443551458074e-01, - "cpu_time": 2.2205557740324600e-01, + "iterations": 3179, + "real_time": 2.1992300025657049e-01, + "cpu_time": 2.1991539792387513e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2744, - "real_time": 2.5502688172694199e-01, - "cpu_time": 2.5501861734693820e-01, + "iterations": 2758, + "real_time": 2.5396899977311266e-01, + "cpu_time": 2.5396026613487976e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2737, - "real_time": 2.5401829891954847e-01, - "cpu_time": 2.5401426963829005e-01, + "iterations": 2740, + "real_time": 2.5533607270378267e-01, + "cpu_time": 2.5533049343065611e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2177, - "real_time": 2.5462905149504739e-01, - "cpu_time": 2.5461684565916515e-01, + "iterations": 2779, + "real_time": 2.5107566979065266e-01, + "cpu_time": 2.5107116228859239e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5097, - "real_time": 1.3730056548799185e-01, - "cpu_time": 1.3729482813419697e-01, + "iterations": 5176, + "real_time": 1.3480490214627530e-01, + "cpu_time": 1.3480058597372524e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..7b1051c1 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:08:40+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.01, 1.16, 1.90 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 34.2 ms 34.2 ms 21 +MLIR_Conv2D/1 119 ms 119 ms 6 +Buddy_Conv2D/1 3.91 ms 3.91 ms 179 +Buddy_Corr2D_Constant_Padding/1 7.79 ms 7.79 ms 90 +OpenCV_Filter2D_Constant_Padding/1 5.89 ms 5.89 ms 119 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.142 ms 0.142 ms 4837 +Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2692 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105099 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49521 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3257 +Buddy_Dilation2D_Constant_Padding/1 0.216 ms 0.215 ms 3222 +Buddy_Opening2D_Constant_Padding/1 0.329 ms 0.329 ms 2228 +Buddy_Closing2D_Constant_Padding/1 0.314 ms 0.313 ms 2221 +Buddy_TopHat2D_Constant_Padding/1 0.789 ms 0.789 ms 845 +Buddy_BottomHat2D_Constant_Padding/1 0.793 ms 0.793 ms 825 +OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5117 +OpenCV_Opening2D_Constant_Padding/1 0.220 ms 0.220 ms 3176 +OpenCV_Closing2D_Constant_Padding/1 0.220 ms 0.220 ms 3179 +OpenCV_TopHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2758 +OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2740 +OpenCV_MorphGrad2D_Constant_Padding/1 0.251 ms 0.251 ms 2779 +OpenCV_Dilate2D_Constant_Padding/1 0.135 ms 0.135 ms 5176 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index 4d296764..6c942dba --- a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:22:53+00:00", + "date": "2025-06-01T10:09:04+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.00537,5.20605,7.47852], + "load_avg": [1.00488,1.14209,1.87793], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 20, - "real_time": 3.4849253855645657e+01, - "cpu_time": 3.4847690799999995e+01, + "real_time": 3.4315383434295654e+01, + "cpu_time": 3.4314020649999989e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 6, - "real_time": 1.2197488794724147e+02, - "cpu_time": 1.2197084233333338e+02, + "real_time": 1.1923250618080299e+02, + "cpu_time": 1.1923025466666671e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 179, - "real_time": 3.8715138948163506e+00, - "cpu_time": 3.8714169888268164e+00, + "iterations": 176, + "real_time": 3.9768879704008047e+00, + "cpu_time": 3.9767473920454539e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 89, - "real_time": 7.8892975292179024e+00, - "cpu_time": 7.8889599438202236e+00, + "iterations": 90, + "real_time": 7.7952975821163921e+00, + "cpu_time": 7.7950874333333360e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 118, - "real_time": 5.9100432889693870e+00, - "cpu_time": 5.9099251355932196e+00, + "iterations": 119, + "real_time": 5.8853118954335946e+00, + "cpu_time": 5.8850751092436999e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4765, - "real_time": 1.4643552483441571e-01, - "cpu_time": 1.4643220839454354e-01, + "iterations": 4830, + "real_time": 1.4269068127586729e-01, + "cpu_time": 1.4268681076604545e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2629, - "real_time": 2.6632445666353799e-01, - "cpu_time": 2.6630646177253708e-01, + "iterations": 2690, + "real_time": 2.6056179049732958e-01, + "cpu_time": 2.6055384832713774e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102844, - "real_time": 6.7916700202742914e-03, - "cpu_time": 6.7913482167165796e-03, + "iterations": 105110, + "real_time": 6.6555711116360709e-03, + "cpu_time": 6.6552045476167832e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48492, - "real_time": 1.4438458006135467e-02, - "cpu_time": 1.4438022684154067e-02, + "iterations": 49449, + "real_time": 1.4160061664736937e-02, + "cpu_time": 1.4159483306032465e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3127, - "real_time": 2.2176959854766587e-01, - "cpu_time": 2.2174655100735502e-01, + "iterations": 3196, + "real_time": 2.1433003843576648e-01, + "cpu_time": 2.1432159981226551e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3155, - "real_time": 2.2122683313493682e-01, - "cpu_time": 2.2121264564183812e-01, + "iterations": 3263, + "real_time": 2.1439825375588656e-01, + "cpu_time": 2.1438984094391658e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2000, - "real_time": 3.4897359274327755e-01, - "cpu_time": 3.4894982949999953e-01, + "iterations": 2208, + "real_time": 3.1279467056383903e-01, + "cpu_time": 3.1279010869565232e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2036, - "real_time": 3.5520158397044788e-01, - "cpu_time": 3.5518303585461614e-01, + "iterations": 2187, + "real_time": 3.2674287674521879e-01, + "cpu_time": 3.2672890352080491e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 751, - "real_time": 9.1041019620416008e-01, - "cpu_time": 9.1037048335552573e-01, + "iterations": 835, + "real_time": 8.0294330007658743e-01, + "cpu_time": 8.0290843592814365e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 755, - "real_time": 9.1234293106375941e-01, - "cpu_time": 9.1229672317880861e-01, + "iterations": 832, + "real_time": 7.9833573321453655e-01, + "cpu_time": 7.9829145552884551e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5001, - "real_time": 1.3972985711104391e-01, - "cpu_time": 1.3972479084183179e-01, + "iterations": 5076, + "real_time": 1.3770647691907706e-01, + "cpu_time": 1.3770291745468863e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3193, - "real_time": 2.2016344559890813e-01, - "cpu_time": 2.2015363670529223e-01, + "iterations": 3051, + "real_time": 2.2880784661720635e-01, + "cpu_time": 2.2880403670927538e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3193, - "real_time": 2.1923410233998053e-01, - "cpu_time": 2.1922637206389020e-01, + "iterations": 3037, + "real_time": 2.3033768049011036e-01, + "cpu_time": 2.3032893085281514e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2734, - "real_time": 2.5621622435655922e-01, - "cpu_time": 2.5620427834674475e-01, + "iterations": 2671, + "real_time": 2.6261604532950916e-01, + "cpu_time": 2.6260133545488590e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2725, - "real_time": 2.5520740845881473e-01, - "cpu_time": 2.5519394238532134e-01, + "iterations": 2661, + "real_time": 2.6305766453513613e-01, + "cpu_time": 2.6304995978955348e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2723, - "real_time": 2.5575247593566069e-01, - "cpu_time": 2.5573402240176246e-01, + "iterations": 2719, + "real_time": 2.5722212437300523e-01, + "cpu_time": 2.5721654983449854e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5067, - "real_time": 1.3828342947730279e-01, - "cpu_time": 1.3827946674560934e-01, + "iterations": 5132, + "real_time": 1.3590039066906470e-01, + "cpu_time": 1.3589483982852715e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..86b65056 --- /dev/null +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:09:04+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.14, 1.88 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 34.3 ms 34.3 ms 20 +MLIR_Conv2D/1 119 ms 119 ms 6 +Buddy_Conv2D/1 3.98 ms 3.98 ms 176 +Buddy_Corr2D_Constant_Padding/1 7.80 ms 7.80 ms 90 +OpenCV_Filter2D_Constant_Padding/1 5.89 ms 5.89 ms 119 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4830 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2690 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105110 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49449 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3196 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3263 +Buddy_Opening2D_Constant_Padding/1 0.313 ms 0.313 ms 2208 +Buddy_Closing2D_Constant_Padding/1 0.327 ms 0.327 ms 2187 +Buddy_TopHat2D_Constant_Padding/1 0.803 ms 0.803 ms 835 +Buddy_BottomHat2D_Constant_Padding/1 0.798 ms 0.798 ms 832 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5076 +OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3051 +OpenCV_Closing2D_Constant_Padding/1 0.230 ms 0.230 ms 3037 +OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2671 +OpenCV_BottomHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2661 +OpenCV_MorphGrad2D_Constant_Padding/1 0.257 ms 0.257 ms 2719 +OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5132 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index bcbe8558..7b43959b --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:17:38+00:00", + "date": "2025-06-01T10:03:44+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.48291,11.0342,9.65625], + "load_avg": [1.01025,1.3501,2.23389], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 139, - "real_time": 4.9939627270046758e+00, - "cpu_time": 4.9937224388489208e+00, + "iterations": 148, + "real_time": 4.7447106272384927e+00, + "cpu_time": 4.7446774594594583e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 94, - "real_time": 7.3809593598893350e+00, - "cpu_time": 7.3805415851063847e+00, + "iterations": 97, + "real_time": 7.1993130691272693e+00, + "cpu_time": 7.1992214536082475e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1001, - "real_time": 7.1126366076948166e-01, - "cpu_time": 7.1122309190809208e-01, + "iterations": 994, + "real_time": 7.0498319121613584e-01, + "cpu_time": 7.0496927162977874e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 645, - "real_time": 1.0820790201194526e+00, - "cpu_time": 1.0819991410852714e+00, + "iterations": 652, + "real_time": 1.0683491848911977e+00, + "cpu_time": 1.0683190337423316e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 367, - "real_time": 1.9048442226664573e+00, - "cpu_time": 1.9047783051771108e+00, + "iterations": 376, + "real_time": 1.8636996540775959e+00, + "cpu_time": 1.8636467952127671e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4717, - "real_time": 1.4679838001146794e-01, - "cpu_time": 1.4678647275810888e-01, + "iterations": 4854, + "real_time": 1.4280247857750419e-01, + "cpu_time": 1.4279905850844668e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2633, - "real_time": 2.6625526897068763e-01, - "cpu_time": 2.6624255070262071e-01, + "iterations": 2692, + "real_time": 2.6015500866020591e-01, + "cpu_time": 2.6014284992570597e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103033, - "real_time": 6.7974476629531662e-03, - "cpu_time": 6.7971674414993247e-03, + "iterations": 105153, + "real_time": 6.6558956463923079e-03, + "cpu_time": 6.6557461983966244e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48825, - "real_time": 1.4324477137935753e-02, - "cpu_time": 1.4324028919610843e-02, + "iterations": 49887, + "real_time": 1.4016378462514136e-02, + "cpu_time": 1.4016046244512574e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3139, - "real_time": 2.2281337542475391e-01, - "cpu_time": 2.2280684963364145e-01, + "iterations": 3272, + "real_time": 2.1622067517748964e-01, + "cpu_time": 2.1621826436430350e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3065, - "real_time": 2.2091467243423274e-01, - "cpu_time": 2.2090241370309904e-01, + "iterations": 3249, + "real_time": 2.1524323226764189e-01, + "cpu_time": 2.1523652816251135e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1920, - "real_time": 3.6005476140417159e-01, - "cpu_time": 3.6003958437500022e-01, + "iterations": 2259, + "real_time": 3.1131270476388107e-01, + "cpu_time": 3.1130147410358577e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1938, - "real_time": 3.6397507227765025e-01, - "cpu_time": 3.6396496800825567e-01, + "iterations": 2229, + "real_time": 3.0684158683973023e-01, + "cpu_time": 3.0682693808882905e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 725, - "real_time": 8.9656516138849585e-01, - "cpu_time": 8.9648728137931033e-01, + "iterations": 858, + "real_time": 7.7721029780544604e-01, + "cpu_time": 7.7715368881118829e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 739, - "real_time": 9.1901460866644191e-01, - "cpu_time": 9.1897194046008335e-01, + "iterations": 831, + "real_time": 7.6740273643199863e-01, + "cpu_time": 7.6736787845968901e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4653, - "real_time": 1.5406708547102105e-01, - "cpu_time": 1.5406045841392613e-01, + "iterations": 5114, + "real_time": 1.3575900606825617e-01, + "cpu_time": 1.3575761967149003e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3211, - "real_time": 2.2334867362681299e-01, - "cpu_time": 2.2334181719090618e-01, + "iterations": 3131, + "real_time": 2.2333534568291205e-01, + "cpu_time": 2.2333002331523552e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3106, - "real_time": 2.2087438381052141e-01, - "cpu_time": 2.2086412846104286e-01, + "iterations": 3149, + "real_time": 2.2213427404934885e-01, + "cpu_time": 2.2212822959669751e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2694, - "real_time": 2.5705747035316123e-01, - "cpu_time": 2.5704664179658498e-01, + "iterations": 2744, + "real_time": 2.5495233790780641e-01, + "cpu_time": 2.5493706669096206e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2735, - "real_time": 2.5591517047755680e-01, - "cpu_time": 2.5590764716636138e-01, + "iterations": 2738, + "real_time": 2.5553990242536090e-01, + "cpu_time": 2.5553167786705649e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2736, - "real_time": 2.5600879739054982e-01, - "cpu_time": 2.5599841739765994e-01, + "iterations": 2822, + "real_time": 2.4826389233813398e-01, + "cpu_time": 2.4825289546420984e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4980, - "real_time": 1.4035605851665559e-01, - "cpu_time": 1.4035354417670684e-01, + "iterations": 5112, + "real_time": 1.3681590347222880e-01, + "cpu_time": 1.3681292938184636e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..e18829e1 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:03:44+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.01, 1.35, 2.23 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.74 ms 4.74 ms 148 +MLIR_Conv2D/1 7.20 ms 7.20 ms 97 +Buddy_Conv2D/1 0.705 ms 0.705 ms 994 +Buddy_Corr2D_Constant_Padding/1 1.07 ms 1.07 ms 652 +OpenCV_Filter2D_Constant_Padding/1 1.86 ms 1.86 ms 376 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4854 +Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2692 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105153 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49887 +Buddy_Erosion2D_Constant_Padding/1 0.216 ms 0.216 ms 3272 +Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3249 +Buddy_Opening2D_Constant_Padding/1 0.311 ms 0.311 ms 2259 +Buddy_Closing2D_Constant_Padding/1 0.307 ms 0.307 ms 2229 +Buddy_TopHat2D_Constant_Padding/1 0.777 ms 0.777 ms 858 +Buddy_BottomHat2D_Constant_Padding/1 0.767 ms 0.767 ms 831 +OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5114 +OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3131 +OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3149 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2744 +OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2738 +OpenCV_MorphGrad2D_Constant_Padding/1 0.248 ms 0.248 ms 2822 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5112 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index af0b0c54..eeafbc1a --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:18:02+00:00", + "date": "2025-06-01T10:04:08+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.4873,10.48,9.50293], + "load_avg": [1.00537,1.3208,2.19971], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 140, - "real_time": 5.0121187365480830e+00, - "cpu_time": 5.0116892428571438e+00, + "iterations": 144, + "real_time": 4.8768702480528088e+00, + "cpu_time": 4.8767275416666651e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 95, - "real_time": 7.3891957535555486e+00, - "cpu_time": 7.3886794842105266e+00, + "iterations": 97, + "real_time": 7.2076135343804806e+00, + "cpu_time": 7.2073728865979385e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 962, - "real_time": 7.0211241165032756e-01, - "cpu_time": 7.0208425571725575e-01, + "iterations": 988, + "real_time": 7.0670188257568756e-01, + "cpu_time": 7.0667706781376538e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 652, - "real_time": 1.0686190941719556e+00, - "cpu_time": 1.0686101825153378e+00, + "iterations": 668, + "real_time": 1.0503804350193746e+00, + "cpu_time": 1.0503256976047903e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 366, - "real_time": 1.9082594459336963e+00, - "cpu_time": 1.9081814426229509e+00, + "iterations": 376, + "real_time": 1.8618261055188610e+00, + "cpu_time": 1.8617787287234049e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4698, - "real_time": 1.4621149407594242e-01, - "cpu_time": 1.4620496956151557e-01, + "iterations": 4847, + "real_time": 1.4253342164861363e-01, + "cpu_time": 1.4253089168557878e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2627, - "real_time": 2.6652376445064901e-01, - "cpu_time": 2.6651557099352852e-01, + "iterations": 2676, + "real_time": 2.6066214687041994e-01, + "cpu_time": 2.6065746524663669e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102351, - "real_time": 6.8222627678054546e-03, - "cpu_time": 6.8219908061474717e-03, + "iterations": 104914, + "real_time": 6.6579347712314094e-03, + "cpu_time": 6.6577343633833427e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48814, - "real_time": 1.4326468274316613e-02, - "cpu_time": 1.4325946429303082e-02, + "iterations": 49862, + "real_time": 1.4023264916892000e-02, + "cpu_time": 1.4022960731619289e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3146, - "real_time": 2.2161174837904832e-01, - "cpu_time": 2.2160283598219940e-01, + "iterations": 3188, + "real_time": 2.1308184504378097e-01, + "cpu_time": 2.1307364052697600e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3150, - "real_time": 2.2120316409402424e-01, - "cpu_time": 2.2118643746031705e-01, + "iterations": 3259, + "real_time": 2.1614176719859540e-01, + "cpu_time": 2.1612986836452897e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2128, - "real_time": 3.2950831661210922e-01, - "cpu_time": 3.2949963768797014e-01, + "iterations": 2184, + "real_time": 3.1719380228920080e-01, + "cpu_time": 3.1718530998168509e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2167, - "real_time": 3.3219173261796925e-01, - "cpu_time": 3.3217791832025839e-01, + "iterations": 2136, + "real_time": 3.1429035187967486e-01, + "cpu_time": 3.1427967602996248e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 747, - "real_time": 8.6567999977104793e-01, - "cpu_time": 8.6563935742971920e-01, + "iterations": 814, + "real_time": 7.8621265965831955e-01, + "cpu_time": 7.8619211547911549e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 769, - "real_time": 8.7533015346031040e-01, - "cpu_time": 8.7531097139141834e-01, + "iterations": 847, + "real_time": 7.9931024739072343e-01, + "cpu_time": 7.9924577449822931e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5009, - "real_time": 1.3942049210491378e-01, - "cpu_time": 1.3941283669395094e-01, + "iterations": 5040, + "real_time": 1.3869987608539680e-01, + "cpu_time": 1.3869627519841246e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3227, - "real_time": 2.1610022523952113e-01, - "cpu_time": 2.1609240285094464e-01, + "iterations": 3163, + "real_time": 2.2131044840439565e-01, + "cpu_time": 2.2130652987669977e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3120, - "real_time": 2.2463623064164168e-01, - "cpu_time": 2.2463104391025654e-01, + "iterations": 3197, + "real_time": 2.1893111420844844e-01, + "cpu_time": 2.1892508163903554e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2745, - "real_time": 2.5778625547234479e-01, - "cpu_time": 2.5777660327868818e-01, + "iterations": 2741, + "real_time": 2.5511631760467623e-01, + "cpu_time": 2.5510120357533728e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2765, - "real_time": 2.5453510783464522e-01, - "cpu_time": 2.5452241663652830e-01, + "iterations": 2735, + "real_time": 2.5600316360511116e-01, + "cpu_time": 2.5599668336380238e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2733, - "real_time": 2.5569065993646978e-01, - "cpu_time": 2.5567140065861649e-01, + "iterations": 2817, + "real_time": 2.4800523348552012e-01, + "cpu_time": 2.4799882215122357e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4956, - "real_time": 1.4090421929041450e-01, - "cpu_time": 1.4089905790960416e-01, + "iterations": 5127, + "real_time": 1.3664415676923483e-01, + "cpu_time": 1.3663955119953172e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..f9f26c26 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:04:08+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.01, 1.32, 2.20 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.88 ms 4.88 ms 144 +MLIR_Conv2D/1 7.21 ms 7.21 ms 97 +Buddy_Conv2D/1 0.707 ms 0.707 ms 988 +Buddy_Corr2D_Constant_Padding/1 1.05 ms 1.05 ms 668 +OpenCV_Filter2D_Constant_Padding/1 1.86 ms 1.86 ms 376 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4847 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2676 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104914 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49862 +Buddy_Erosion2D_Constant_Padding/1 0.213 ms 0.213 ms 3188 +Buddy_Dilation2D_Constant_Padding/1 0.216 ms 0.216 ms 3259 +Buddy_Opening2D_Constant_Padding/1 0.317 ms 0.317 ms 2184 +Buddy_Closing2D_Constant_Padding/1 0.314 ms 0.314 ms 2136 +Buddy_TopHat2D_Constant_Padding/1 0.786 ms 0.786 ms 814 +Buddy_BottomHat2D_Constant_Padding/1 0.799 ms 0.799 ms 847 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5040 +OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3163 +OpenCV_Closing2D_Constant_Padding/1 0.219 ms 0.219 ms 3197 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2741 +OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2735 +OpenCV_MorphGrad2D_Constant_Padding/1 0.248 ms 0.248 ms 2817 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5127 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index 67bbc523..3a96a344 --- a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:18:25+00:00", + "date": "2025-06-01T10:04:31+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.2627,9.78223,9.29688], + "load_avg": [1.00342,1.29932,2.17285], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 60, - "real_time": 1.1918221662441889e+01, - "cpu_time": 1.1917878933333334e+01, + "real_time": 1.1527189146727324e+01, + "cpu_time": 1.1526637466666667e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23, - "real_time": 2.9796444284527198e+01, - "cpu_time": 2.9795302565217398e+01, + "iterations": 24, + "real_time": 2.9093050320322316e+01, + "cpu_time": 2.9092513625000009e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 332, - "real_time": 2.0893121373850896e+00, - "cpu_time": 2.0892674909638549e+00, + "iterations": 343, + "real_time": 2.0371641713919515e+00, + "cpu_time": 2.0370861661807580e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 390, - "real_time": 1.7985034925051224e+00, - "cpu_time": 1.7984618025641028e+00, + "iterations": 400, + "real_time": 1.7372224666178226e+00, + "cpu_time": 1.7371669650000010e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 256, - "real_time": 2.7374664350645617e+00, - "cpu_time": 2.7373652890625015e+00, + "iterations": 261, + "real_time": 2.6778692086994420e+00, + "cpu_time": 2.6778029348658996e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4768, - "real_time": 1.4551469881162549e-01, - "cpu_time": 1.4550845364932896e-01, + "iterations": 4858, + "real_time": 1.4252335135278607e-01, + "cpu_time": 1.4251939584191020e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2629, - "real_time": 2.6624438559410463e-01, - "cpu_time": 2.6623375123621162e-01, + "iterations": 2687, + "real_time": 2.6088196868829050e-01, + "cpu_time": 2.6087599590621513e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103262, - "real_time": 6.7758366610738774e-03, - "cpu_time": 6.7757091766574339e-03, + "iterations": 104992, + "real_time": 6.6678219976420809e-03, + "cpu_time": 6.6676330577567803e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48878, - "real_time": 1.4318795230307766e-02, - "cpu_time": 1.4318104157289568e-02, + "iterations": 49870, + "real_time": 1.4044699138275052e-02, + "cpu_time": 1.4044414618006824e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3105, - "real_time": 2.2355384296841091e-01, - "cpu_time": 2.2354234396135308e-01, + "iterations": 3259, + "real_time": 2.1626527674653920e-01, + "cpu_time": 2.1625656489720799e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3120, - "real_time": 2.2285880389599463e-01, - "cpu_time": 2.2284695673076915e-01, + "iterations": 3239, + "real_time": 2.1562600803342119e-01, + "cpu_time": 2.1561170608212396e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1986, - "real_time": 3.5211119298098192e-01, - "cpu_time": 3.5210273917421908e-01, + "iterations": 2223, + "real_time": 3.2025049270888573e-01, + "cpu_time": 3.2023937067026542e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1944, - "real_time": 3.4801221027234452e-01, - "cpu_time": 3.4799811882716053e-01, + "iterations": 2211, + "real_time": 3.0800477782292152e-01, + "cpu_time": 3.0799415920398016e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 747, - "real_time": 9.1425062392849521e-01, - "cpu_time": 9.1420195046853980e-01, + "iterations": 836, + "real_time": 7.9134119558705096e-01, + "cpu_time": 7.9132698803827617e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 739, - "real_time": 8.9897179071248301e-01, - "cpu_time": 8.9893609201623792e-01, + "iterations": 841, + "real_time": 8.0551134198726848e-01, + "cpu_time": 8.0547060642092871e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4963, - "real_time": 1.4065765219723869e-01, - "cpu_time": 1.4065358674189021e-01, + "iterations": 5076, + "real_time": 1.3788279668494788e-01, + "cpu_time": 1.3788056402679286e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3142, - "real_time": 2.2345523745352716e-01, - "cpu_time": 2.2343788860598351e-01, + "iterations": 3086, + "real_time": 2.2677640289995318e-01, + "cpu_time": 2.2677006156837323e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3089, - "real_time": 2.2617580448106950e-01, - "cpu_time": 2.2616760051796744e-01, + "iterations": 3114, + "real_time": 2.2484443892767281e-01, + "cpu_time": 2.2483772286448303e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2689, - "real_time": 2.6113948967985257e-01, - "cpu_time": 2.6113230011156607e-01, + "iterations": 2653, + "real_time": 2.6417284279983089e-01, + "cpu_time": 2.6416585978137841e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2693, - "real_time": 2.6151641644468282e-01, - "cpu_time": 2.6150498106201342e-01, + "iterations": 2674, + "real_time": 2.6183896513464922e-01, + "cpu_time": 2.6182971316379983e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2664, - "real_time": 2.6288690386681229e-01, - "cpu_time": 2.6287323873873880e-01, + "iterations": 2741, + "real_time": 2.5503337451225211e-01, + "cpu_time": 2.5502575738781558e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5045, - "real_time": 1.3856769339121486e-01, - "cpu_time": 1.3856448741328012e-01, + "iterations": 5067, + "real_time": 1.3808122899832517e-01, + "cpu_time": 1.3807692401815616e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..62938745 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:04:31+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.30, 2.17 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.5 ms 11.5 ms 60 +MLIR_Conv2D/1 29.1 ms 29.1 ms 24 +Buddy_Conv2D/1 2.04 ms 2.04 ms 343 +Buddy_Corr2D_Constant_Padding/1 1.74 ms 1.74 ms 400 +OpenCV_Filter2D_Constant_Padding/1 2.68 ms 2.68 ms 261 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4858 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2687 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104992 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49870 +Buddy_Erosion2D_Constant_Padding/1 0.216 ms 0.216 ms 3259 +Buddy_Dilation2D_Constant_Padding/1 0.216 ms 0.216 ms 3239 +Buddy_Opening2D_Constant_Padding/1 0.320 ms 0.320 ms 2223 +Buddy_Closing2D_Constant_Padding/1 0.308 ms 0.308 ms 2211 +Buddy_TopHat2D_Constant_Padding/1 0.791 ms 0.791 ms 836 +Buddy_BottomHat2D_Constant_Padding/1 0.806 ms 0.805 ms 841 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5076 +OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 3086 +OpenCV_Closing2D_Constant_Padding/1 0.225 ms 0.225 ms 3114 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2653 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2674 +OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2741 +OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5067 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index beb2ef45..1f531150 --- a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:18:50+00:00", + "date": "2025-06-01T10:04:55+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.17188,9.15625,9.10205], + "load_avg": [1.05908,1.28955,2.14648], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 60, - "real_time": 1.1642454595615467e+01, - "cpu_time": 1.1642006716666668e+01, + "iterations": 61, + "real_time": 1.1576852715406261e+01, + "cpu_time": 1.1576403065573771e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23, - "real_time": 2.9861606090613034e+01, - "cpu_time": 2.9859480782608685e+01, + "iterations": 24, + "real_time": 2.8955480316653848e+01, + "cpu_time": 2.8955074333333339e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 327, - "real_time": 2.1290786120049452e+00, - "cpu_time": 2.1289165015290519e+00, + "iterations": 337, + "real_time": 2.0792299345445207e+00, + "cpu_time": 2.0791793204747768e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 389, - "real_time": 1.8013706837797532e+00, - "cpu_time": 1.8012753650385598e+00, + "iterations": 399, + "real_time": 1.7465614295917047e+00, + "cpu_time": 1.7465200451127822e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 255, - "real_time": 2.7427997367054808e+00, - "cpu_time": 2.7426879019607848e+00, + "iterations": 261, + "real_time": 2.6776142832305698e+00, + "cpu_time": 2.6775001149425290e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4755, - "real_time": 1.4582137870049000e-01, - "cpu_time": 1.4581456950578325e-01, + "iterations": 4856, + "real_time": 1.4251252004180159e-01, + "cpu_time": 1.4250735070016479e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2633, - "real_time": 2.6608296191447867e-01, - "cpu_time": 2.6606331978731484e-01, + "iterations": 2688, + "real_time": 2.6082554818241899e-01, + "cpu_time": 2.6081198883928558e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102709, - "real_time": 6.8135985735357394e-03, - "cpu_time": 6.8133925848757161e-03, + "iterations": 104682, + "real_time": 6.6886387324274427e-03, + "cpu_time": 6.6884607382358017e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48854, - "real_time": 1.4308495751672484e-02, - "cpu_time": 1.4308075367421284e-02, + "iterations": 49744, + "real_time": 1.4085311272764809e-02, + "cpu_time": 1.4084703180283075e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2456, - "real_time": 2.3071197049649803e-01, - "cpu_time": 2.3070322231270354e-01, + "iterations": 3240, + "real_time": 2.1877774658302465e-01, + "cpu_time": 2.1876813796296318e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3097, - "real_time": 2.2570516530413531e-01, - "cpu_time": 2.2569759476913098e-01, + "iterations": 3213, + "real_time": 2.1521158405100488e-01, + "cpu_time": 2.1520321693121730e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2015, - "real_time": 3.5169201050295723e-01, - "cpu_time": 3.5164615136476429e-01, + "iterations": 2240, + "real_time": 3.0795027185896678e-01, + "cpu_time": 3.0794031473214295e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2002, - "real_time": 3.4624248832374899e-01, - "cpu_time": 3.4622674325674280e-01, + "iterations": 2269, + "real_time": 3.0903523125381605e-01, + "cpu_time": 3.0902282150727212e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 721, - "real_time": 9.2392744452546605e-01, - "cpu_time": 9.2383990707350849e-01, + "iterations": 841, + "real_time": 8.1986287447062456e-01, + "cpu_time": 8.1985148275862063e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 727, - "real_time": 9.2287308074011276e-01, - "cpu_time": 9.2279450894085369e-01, + "iterations": 846, + "real_time": 8.0027264360135897e-01, + "cpu_time": 8.0021359219858113e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4988, - "real_time": 1.4017799080505597e-01, - "cpu_time": 1.4017390336808308e-01, + "iterations": 5072, + "real_time": 1.3771428277110828e-01, + "cpu_time": 1.3771015950315443e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3142, - "real_time": 2.2201454960506939e-01, - "cpu_time": 2.2200861521323975e-01, + "iterations": 3139, + "real_time": 2.2283822592370409e-01, + "cpu_time": 2.2282934246575339e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3180, - "real_time": 2.2126464934656456e-01, - "cpu_time": 2.2125921069182428e-01, + "iterations": 3074, + "real_time": 2.2787362380739770e-01, + "cpu_time": 2.2786527260897943e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2711, - "real_time": 2.6487032235306335e-01, - "cpu_time": 2.6486294909627450e-01, + "iterations": 2680, + "real_time": 2.6127572115788711e-01, + "cpu_time": 2.6126877089552214e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2642, - "real_time": 2.6148257186293694e-01, - "cpu_time": 2.6147949810749349e-01, + "iterations": 2676, + "real_time": 2.6178410588603562e-01, + "cpu_time": 2.6178113677129977e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2712, - "real_time": 2.5861562781893044e-01, - "cpu_time": 2.5860544026548782e-01, + "iterations": 2755, + "real_time": 2.5373509190909010e-01, + "cpu_time": 2.5373201778584381e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5064, - "real_time": 1.3794987734648478e-01, - "cpu_time": 1.3794665541074216e-01, + "iterations": 5189, + "real_time": 1.3500250305920858e-01, + "cpu_time": 1.3499600346887636e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..60130c33 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:04:55+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.06, 1.29, 2.15 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.6 ms 11.6 ms 61 +MLIR_Conv2D/1 29.0 ms 29.0 ms 24 +Buddy_Conv2D/1 2.08 ms 2.08 ms 337 +Buddy_Corr2D_Constant_Padding/1 1.75 ms 1.75 ms 399 +OpenCV_Filter2D_Constant_Padding/1 2.68 ms 2.68 ms 261 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4856 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2688 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104682 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49744 +Buddy_Erosion2D_Constant_Padding/1 0.219 ms 0.219 ms 3240 +Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3213 +Buddy_Opening2D_Constant_Padding/1 0.308 ms 0.308 ms 2240 +Buddy_Closing2D_Constant_Padding/1 0.309 ms 0.309 ms 2269 +Buddy_TopHat2D_Constant_Padding/1 0.820 ms 0.820 ms 841 +Buddy_BottomHat2D_Constant_Padding/1 0.800 ms 0.800 ms 846 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5072 +OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3139 +OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 3074 +OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2680 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2676 +OpenCV_MorphGrad2D_Constant_Padding/1 0.254 ms 0.254 ms 2755 +OpenCV_Dilate2D_Constant_Padding/1 0.135 ms 0.135 ms 5189 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index a47199eb..0bb23e15 --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:13:36+00:00", + "date": "2025-06-01T09:59:45+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [30.7622,22.5166,11.9751], + "load_avg": [1.45068,1.74609,2.59521], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 140, - "real_time": 4.9810536737952917e+00, - "cpu_time": 4.9810131785714287e+00, + "iterations": 139, + "real_time": 5.0280020980955031e+00, + "cpu_time": 5.0279279856115116e+00, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 95, - "real_time": 7.3597828220379977e+00, - "cpu_time": 7.3595218842105261e+00, + "real_time": 7.3824502919849593e+00, + "cpu_time": 7.3823169052631581e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1585, - "real_time": 4.3800919777963437e-01, - "cpu_time": 4.3799472555205066e-01, + "iterations": 1363, + "real_time": 5.2151041855640645e-01, + "cpu_time": 5.2149413939838585e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 879, - "real_time": 7.9641059736071729e-01, - "cpu_time": 7.9638082935153565e-01, + "iterations": 865, + "real_time": 8.1360158160587270e-01, + "cpu_time": 8.1358696878612724e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 549, - "real_time": 1.2760020127496217e+00, - "cpu_time": 1.2759357449908932e+00, + "iterations": 548, + "real_time": 1.2772948834637221e+00, + "cpu_time": 1.2772620291970815e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4708, - "real_time": 1.4899489521258649e-01, - "cpu_time": 1.4898712000849618e-01, + "iterations": 4815, + "real_time": 1.4578837239853690e-01, + "cpu_time": 1.4578228888888886e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2618, - "real_time": 2.6733481813993409e-01, - "cpu_time": 2.6732274637127584e-01, + "iterations": 2628, + "real_time": 2.6729297675362462e-01, + "cpu_time": 2.6726695243531190e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102964, - "real_time": 6.7923858877275613e-03, - "cpu_time": 6.7922180956450830e-03, + "iterations": 102923, + "real_time": 6.8076673491945352e-03, + "cpu_time": 6.8075040564305363e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48907, - "real_time": 1.4310074157344543e-02, - "cpu_time": 1.4309516122436467e-02, + "iterations": 48919, + "real_time": 1.4305153241039393e-02, + "cpu_time": 1.4304612502299703e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3019, - "real_time": 2.3269172327215998e-01, - "cpu_time": 2.3268162702881737e-01, + "iterations": 3130, + "real_time": 2.2225001761421989e-01, + "cpu_time": 2.2224536613418497e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3004, - "real_time": 2.3115333293709711e-01, - "cpu_time": 2.3114786617842836e-01, + "iterations": 3158, + "real_time": 2.2036093685297062e-01, + "cpu_time": 2.2035071089297040e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2064, - "real_time": 3.3480858939244995e-01, - "cpu_time": 3.3479587936046529e-01, + "iterations": 2177, + "real_time": 3.3364478168434980e-01, + "cpu_time": 3.3363160312356482e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2044, - "real_time": 3.3931921504131735e-01, - "cpu_time": 3.3930748091976470e-01, + "iterations": 2114, + "real_time": 3.3768443804843989e-01, + "cpu_time": 3.3767108656575201e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 782, - "real_time": 8.5598471171944346e-01, - "cpu_time": 8.5594456010230346e-01, + "iterations": 734, + "real_time": 8.9341425146572595e-01, + "cpu_time": 8.9337308855585695e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 791, - "real_time": 8.5073583035267109e-01, - "cpu_time": 8.5071446396965955e-01, + "iterations": 761, + "real_time": 8.9906724402375038e-01, + "cpu_time": 8.9901021944809523e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5012, - "real_time": 1.3971026928861999e-01, - "cpu_time": 1.3970263727055060e-01, + "iterations": 5020, + "real_time": 1.3878680765628815e-01, + "cpu_time": 1.3878371175298815e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3259, - "real_time": 2.1476675100434811e-01, - "cpu_time": 2.1475941423749620e-01, + "iterations": 3210, + "real_time": 2.1697144340317567e-01, + "cpu_time": 2.1696341401869182e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3254, - "real_time": 2.1490678431600169e-01, - "cpu_time": 2.1489986662569091e-01, + "iterations": 3180, + "real_time": 2.2020956864521937e-01, + "cpu_time": 2.2020306729559663e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2816, - "real_time": 2.4862459527370942e-01, - "cpu_time": 2.4861500248579568e-01, + "iterations": 2639, + "real_time": 2.5489669915693824e-01, + "cpu_time": 2.5488007427055692e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2807, - "real_time": 2.4944172902042686e-01, - "cpu_time": 2.4942691806198783e-01, + "iterations": 2732, + "real_time": 2.5621967152917369e-01, + "cpu_time": 2.5621515080527107e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2604, - "real_time": 2.6835452481966965e-01, - "cpu_time": 2.6834275153609888e-01, + "iterations": 2827, + "real_time": 2.4727433998971851e-01, + "cpu_time": 2.4726836151397316e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4985, - "real_time": 1.4054916066714490e-01, - "cpu_time": 1.4054059458375109e-01, + "iterations": 5123, + "real_time": 1.3593631128815267e-01, + "cpu_time": 1.3593265586570380e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..295916cf --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T09:59:45+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.45, 1.75, 2.60 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 5.03 ms 5.03 ms 139 +MLIR_Conv2D/1 7.38 ms 7.38 ms 95 +Buddy_Conv2D/1 0.522 ms 0.521 ms 1363 +Buddy_Corr2D_Constant_Padding/1 0.814 ms 0.814 ms 865 +OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 548 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4815 +Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2628 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102923 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48919 +Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3130 +Buddy_Dilation2D_Constant_Padding/1 0.220 ms 0.220 ms 3158 +Buddy_Opening2D_Constant_Padding/1 0.334 ms 0.334 ms 2177 +Buddy_Closing2D_Constant_Padding/1 0.338 ms 0.338 ms 2114 +Buddy_TopHat2D_Constant_Padding/1 0.893 ms 0.893 ms 734 +Buddy_BottomHat2D_Constant_Padding/1 0.899 ms 0.899 ms 761 +OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5020 +OpenCV_Opening2D_Constant_Padding/1 0.217 ms 0.217 ms 3210 +OpenCV_Closing2D_Constant_Padding/1 0.220 ms 0.220 ms 3180 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2639 +OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2732 +OpenCV_MorphGrad2D_Constant_Padding/1 0.247 ms 0.247 ms 2827 +OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5123 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index 54028421..62d7e98e --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:14:00+00:00", + "date": "2025-06-01T10:00:09+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [20.9478,20.8682,11.709], + "load_avg": [1.48438,1.73389,2.56738], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 144, - "real_time": 4.8620806774124503e+00, - "cpu_time": 4.8617971249999998e+00, + "iterations": 143, + "real_time": 4.8859805207360871e+00, + "cpu_time": 4.8858233356643366e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 95, - "real_time": 7.3647483399039819e+00, - "cpu_time": 7.3645103473684204e+00, + "iterations": 97, + "real_time": 7.1891580621913533e+00, + "cpu_time": 7.1889744948453602e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1587, - "real_time": 4.4364659274120188e-01, - "cpu_time": 4.4360596030245769e-01, + "iterations": 1337, + "real_time": 5.2432643170221227e-01, + "cpu_time": 5.2429164921465965e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 850, - "real_time": 7.9619941904264335e-01, - "cpu_time": 7.9616357647058844e-01, + "iterations": 882, + "real_time": 7.9190808553206404e-01, + "cpu_time": 7.9188119614512487e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 549, - "real_time": 1.2764639555322235e+00, - "cpu_time": 1.2764428561020034e+00, + "iterations": 561, + "real_time": 1.2481929319321365e+00, + "cpu_time": 1.2481498698752240e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4759, - "real_time": 1.4590555271648464e-01, - "cpu_time": 1.4590127484765700e-01, + "iterations": 4818, + "real_time": 1.4271947871097246e-01, + "cpu_time": 1.4271616500622658e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2628, - "real_time": 2.6687351161758649e-01, - "cpu_time": 2.6686138660578385e-01, + "iterations": 2683, + "real_time": 2.6050025678440747e-01, + "cpu_time": 2.6049063473723449e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102952, - "real_time": 6.7868454739183929e-03, - "cpu_time": 6.7866766648535231e-03, + "iterations": 104687, + "real_time": 6.6878359398252953e-03, + "cpu_time": 6.6876859686494008e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48945, - "real_time": 1.4264975000005026e-02, - "cpu_time": 1.4264539462662167e-02, + "iterations": 49857, + "real_time": 1.4016524784845726e-02, + "cpu_time": 1.4016148605010314e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3245, - "real_time": 2.2328324638915539e-01, - "cpu_time": 2.2327703112480715e-01, + "iterations": 3251, + "real_time": 2.2090763433443072e-01, + "cpu_time": 2.2090116671793322e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2941, - "real_time": 2.3091911119451655e-01, - "cpu_time": 2.3090739714382824e-01, + "iterations": 3237, + "real_time": 2.1412889112062133e-01, + "cpu_time": 2.1412120729070164e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1920, - "real_time": 3.6398599525758374e-01, - "cpu_time": 3.6397573020833335e-01, + "iterations": 2241, + "real_time": 3.2267528754400920e-01, + "cpu_time": 3.2266634225792068e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1942, - "real_time": 3.6292056717078303e-01, - "cpu_time": 3.6289218640576670e-01, + "iterations": 2271, + "real_time": 3.0766681856990435e-01, + "cpu_time": 3.0765447203875002e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 686, - "real_time": 9.5260923092983907e-01, - "cpu_time": 9.5257817346938856e-01, + "iterations": 841, + "real_time": 8.0524559944478280e-01, + "cpu_time": 8.0521395362663450e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 687, - "real_time": 1.0497556183232317e+00, - "cpu_time": 1.0497079097525470e+00, + "iterations": 846, + "real_time": 8.0883008456540162e-01, + "cpu_time": 8.0878250000000096e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4813, - "real_time": 1.4027526283645592e-01, - "cpu_time": 1.4027346166632040e-01, + "iterations": 5105, + "real_time": 1.3598477528097580e-01, + "cpu_time": 1.3598074338883434e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3185, - "real_time": 2.2067454829227232e-01, - "cpu_time": 2.2066997394034502e-01, + "iterations": 3219, + "real_time": 2.1738641869025632e-01, + "cpu_time": 2.1737873221497292e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3185, - "real_time": 2.2132610858328863e-01, - "cpu_time": 2.2132252841444267e-01, + "iterations": 3216, + "real_time": 2.1741356276698523e-01, + "cpu_time": 2.1740724844527404e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2747, - "real_time": 2.5434105026101389e-01, - "cpu_time": 2.5433080050964629e-01, + "iterations": 2710, + "real_time": 2.5812379768413812e-01, + "cpu_time": 2.5811859741697379e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2760, - "real_time": 2.5351542601550836e-01, - "cpu_time": 2.5350927717391269e-01, + "iterations": 2740, + "real_time": 2.5566765888981574e-01, + "cpu_time": 2.5566491569343169e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2735, - "real_time": 2.5544877979611563e-01, - "cpu_time": 2.5544034881170063e-01, + "iterations": 2803, + "real_time": 2.4951168754953085e-01, + "cpu_time": 2.4950610916874724e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4913, - "real_time": 1.4221790321921954e-01, - "cpu_time": 1.4221190637085293e-01, + "iterations": 5105, + "real_time": 1.3737050993377151e-01, + "cpu_time": 1.3736774319294784e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..0e157215 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:00:09+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.48, 1.73, 2.57 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.89 ms 4.89 ms 143 +MLIR_Conv2D/1 7.19 ms 7.19 ms 97 +Buddy_Conv2D/1 0.524 ms 0.524 ms 1337 +Buddy_Corr2D_Constant_Padding/1 0.792 ms 0.792 ms 882 +OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 561 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4818 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.260 ms 2683 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104687 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49857 +Buddy_Erosion2D_Constant_Padding/1 0.221 ms 0.221 ms 3251 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3237 +Buddy_Opening2D_Constant_Padding/1 0.323 ms 0.323 ms 2241 +Buddy_Closing2D_Constant_Padding/1 0.308 ms 0.308 ms 2271 +Buddy_TopHat2D_Constant_Padding/1 0.805 ms 0.805 ms 841 +Buddy_BottomHat2D_Constant_Padding/1 0.809 ms 0.809 ms 846 +OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5105 +OpenCV_Opening2D_Constant_Padding/1 0.217 ms 0.217 ms 3219 +OpenCV_Closing2D_Constant_Padding/1 0.217 ms 0.217 ms 3216 +OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2710 +OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2740 +OpenCV_MorphGrad2D_Constant_Padding/1 0.250 ms 0.250 ms 2803 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5105 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index 7c0b9df1..c1db9850 --- a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:14:25+00:00", + "date": "2025-06-01T10:00:33+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [14.4819,19.3521,11.4497], + "load_avg": [1.31787,1.67383,2.52441], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 143, - "real_time": 4.8729841291279223e+00, - "cpu_time": 4.8728229930069924e+00, + "real_time": 4.8869541501040228e+00, + "cpu_time": 4.8868488251748250e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 95, - "real_time": 7.3783556489568012e+00, - "cpu_time": 7.3781441684210529e+00, + "iterations": 97, + "real_time": 7.1941099907319570e+00, + "cpu_time": 7.1940195670103098e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1300, - "real_time": 5.2574137225747108e-01, - "cpu_time": 5.2571169923076932e-01, + "iterations": 1334, + "real_time": 5.2251873713830066e-01, + "cpu_time": 5.2250321664167931e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 860, - "real_time": 8.1764264465417968e-01, - "cpu_time": 8.1761070348837184e-01, + "iterations": 882, + "real_time": 7.9283786223791619e-01, + "cpu_time": 7.9282752607709761e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 548, - "real_time": 1.2776352410768941e+00, - "cpu_time": 1.2775980602189785e+00, + "iterations": 561, + "real_time": 1.2480702330881261e+00, + "cpu_time": 1.2480264402852053e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4746, - "real_time": 1.4604094214156566e-01, - "cpu_time": 1.4603584492203947e-01, + "iterations": 4847, + "real_time": 1.4256667295804781e-01, + "cpu_time": 1.4255895791211062e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2630, - "real_time": 2.6615352260069247e-01, - "cpu_time": 2.6613684258555137e-01, + "iterations": 2679, + "real_time": 2.6033759623287595e-01, + "cpu_time": 2.6032852967525205e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102577, - "real_time": 6.8162450673133279e-03, - "cpu_time": 6.8160773175273215e-03, + "iterations": 104555, + "real_time": 6.6839088878435033e-03, + "cpu_time": 6.6837356319640398e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48983, - "real_time": 1.4289135157354995e-02, - "cpu_time": 1.4288656635975739e-02, + "iterations": 49894, + "real_time": 1.4026098685923638e-02, + "cpu_time": 1.4025601475127254e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3160, - "real_time": 2.2105318258363235e-01, - "cpu_time": 2.2104365632911407e-01, + "iterations": 3270, + "real_time": 2.1376049299852565e-01, + "cpu_time": 2.1375075779816491e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3155, - "real_time": 2.2091350375000157e-01, - "cpu_time": 2.2090452329635513e-01, + "iterations": 3269, + "real_time": 2.1365958981901981e-01, + "cpu_time": 2.1365636586112000e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2114, - "real_time": 4.0858148446512765e-01, - "cpu_time": 4.0855447114474897e-01, + "iterations": 2113, + "real_time": 3.1973698017329782e-01, + "cpu_time": 3.1972719451017506e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1704, - "real_time": 4.0029905443495151e-01, - "cpu_time": 4.0026930692488327e-01, + "iterations": 2219, + "real_time": 3.0630169526152162e-01, + "cpu_time": 3.0629267507886482e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 677, - "real_time": 9.8041003429643159e-01, - "cpu_time": 9.8036545494830007e-01, + "iterations": 863, + "real_time": 7.8096507961062123e-01, + "cpu_time": 7.8092313209733399e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 684, - "real_time": 9.7214487757076296e-01, - "cpu_time": 9.7207104093567231e-01, + "iterations": 836, + "real_time": 7.9488013002076785e-01, + "cpu_time": 7.9486416148325389e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5055, - "real_time": 1.3827483485702474e-01, - "cpu_time": 1.3826953273986148e-01, + "iterations": 5175, + "real_time": 1.3511446327115026e-01, + "cpu_time": 1.3511111072463786e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3286, - "real_time": 2.1324022398151715e-01, - "cpu_time": 2.1323121302495490e-01, + "iterations": 3222, + "real_time": 2.1722390704333303e-01, + "cpu_time": 2.1721783985102436e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3307, - "real_time": 2.1567261159095585e-01, - "cpu_time": 2.1566540973692216e-01, + "iterations": 3153, + "real_time": 2.2210372104423221e-01, + "cpu_time": 2.2209552553123957e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2840, - "real_time": 2.4255320141223116e-01, - "cpu_time": 2.4253837957746441e-01, + "iterations": 2720, + "real_time": 2.5731744244694710e-01, + "cpu_time": 2.5730785477941137e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2838, - "real_time": 2.4312600713955676e-01, - "cpu_time": 2.4311954404510186e-01, + "iterations": 2721, + "real_time": 2.5738097552055383e-01, + "cpu_time": 2.5736759573686152e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2741, - "real_time": 2.5526782384278868e-01, - "cpu_time": 2.5525941590660284e-01, + "iterations": 2805, + "real_time": 2.4959671903740277e-01, + "cpu_time": 2.4958885418894797e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5019, - "real_time": 1.3948601532692595e-01, - "cpu_time": 1.3948021837019295e-01, + "iterations": 5121, + "real_time": 1.3665625601104869e-01, + "cpu_time": 1.3665107752392106e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..ca9c8f3b --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:00:33+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.32, 1.67, 2.52 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.89 ms 4.89 ms 143 +MLIR_Conv2D/1 7.19 ms 7.19 ms 97 +Buddy_Conv2D/1 0.523 ms 0.523 ms 1334 +Buddy_Corr2D_Constant_Padding/1 0.793 ms 0.793 ms 882 +OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 561 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4847 +Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2679 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104555 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49894 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3270 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3269 +Buddy_Opening2D_Constant_Padding/1 0.320 ms 0.320 ms 2113 +Buddy_Closing2D_Constant_Padding/1 0.306 ms 0.306 ms 2219 +Buddy_TopHat2D_Constant_Padding/1 0.781 ms 0.781 ms 863 +Buddy_BottomHat2D_Constant_Padding/1 0.795 ms 0.795 ms 836 +OpenCV_Erode2D_Constant_Padding/1 0.135 ms 0.135 ms 5175 +OpenCV_Opening2D_Constant_Padding/1 0.217 ms 0.217 ms 3222 +OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3153 +OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2720 +OpenCV_BottomHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2721 +OpenCV_MorphGrad2D_Constant_Padding/1 0.250 ms 0.250 ms 2805 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5121 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index ca86c2dd..b481f940 --- a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:14:49+00:00", + "date": "2025-06-01T10:00:56+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [10.2954,17.9736,11.2031], + "load_avg": [1.22656,1.62939,2.49121], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 143, - "real_time": 4.8654593683622931e+00, - "cpu_time": 4.8652642377622382e+00, + "iterations": 144, + "real_time": 4.8671740935080585e+00, + "cpu_time": 4.8670024861111107e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 95, - "real_time": 7.3815446935201949e+00, - "cpu_time": 7.3812387052631561e+00, + "iterations": 97, + "real_time": 7.1850729741386532e+00, + "cpu_time": 7.1848575360824745e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1375, - "real_time": 5.2700068327513605e-01, - "cpu_time": 5.2696759345454547e-01, + "iterations": 1322, + "real_time": 5.2583411916436662e-01, + "cpu_time": 5.2582321558245060e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 857, - "real_time": 8.1594105127464234e-01, - "cpu_time": 8.1588588098016346e-01, + "iterations": 885, + "real_time": 7.9312442596686084e-01, + "cpu_time": 7.9308901807909582e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 548, - "real_time": 1.2793068257398414e+00, - "cpu_time": 1.2792433850364955e+00, + "iterations": 560, + "real_time": 1.2487932374434811e+00, + "cpu_time": 1.2487409732142858e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4712, - "real_time": 1.4604800357048500e-01, - "cpu_time": 1.4604121286078095e-01, + "iterations": 4865, + "real_time": 1.4262197755529848e-01, + "cpu_time": 1.4261277903391573e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2626, - "real_time": 2.6662886477524511e-01, - "cpu_time": 2.6661615993907073e-01, + "iterations": 2697, + "real_time": 2.6017889065525024e-01, + "cpu_time": 2.6016967371153132e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103057, - "real_time": 6.7822204399716401e-03, - "cpu_time": 6.7820833325247156e-03, + "iterations": 105155, + "real_time": 6.6622555692346331e-03, + "cpu_time": 6.6618292710760377e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48615, - "real_time": 1.4394099605643772e-02, - "cpu_time": 1.4393704720765189e-02, + "iterations": 49883, + "real_time": 1.4039483654351604e-02, + "cpu_time": 1.4039219974740884e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3174, - "real_time": 2.2084131912007299e-01, - "cpu_time": 2.2083239823566458e-01, + "iterations": 3259, + "real_time": 2.1370667615581340e-01, + "cpu_time": 2.1369622583614614e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3168, - "real_time": 2.2007557951534787e-01, - "cpu_time": 2.2006410353535388e-01, + "iterations": 3259, + "real_time": 2.1333487355186004e-01, + "cpu_time": 2.1332873366063201e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2081, - "real_time": 3.2926392900376639e-01, - "cpu_time": 3.2925050552618995e-01, + "iterations": 2258, + "real_time": 3.1496402605430968e-01, + "cpu_time": 3.1494664968999142e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2123, - "real_time": 3.2805886033622378e-01, - "cpu_time": 3.2802363306641535e-01, + "iterations": 2255, + "real_time": 3.1185760482848351e-01, + "cpu_time": 3.1184659645232815e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 787, - "real_time": 8.8516553198821668e-01, - "cpu_time": 8.8512480304955576e-01, + "iterations": 854, + "real_time": 7.8316566161375134e-01, + "cpu_time": 7.8311624238875910e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 762, - "real_time": 8.6372281886695879e-01, - "cpu_time": 8.6365337926509200e-01, + "iterations": 821, + "real_time": 7.8512378599626287e-01, + "cpu_time": 7.8508044214372708e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5037, - "real_time": 1.3910725835547316e-01, - "cpu_time": 1.3910415386142530e-01, + "iterations": 5119, + "real_time": 1.3652631023603623e-01, + "cpu_time": 1.3651533834733337e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3118, - "real_time": 2.2313356545297972e-01, - "cpu_time": 2.2312385439384252e-01, + "iterations": 3167, + "real_time": 2.2093217465867745e-01, + "cpu_time": 2.2092883517524411e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3165, - "real_time": 2.2287553685347994e-01, - "cpu_time": 2.2286821358609735e-01, + "iterations": 3139, + "real_time": 2.2298171546627055e-01, + "cpu_time": 2.2297460560688059e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2680, - "real_time": 2.5539524547422110e-01, - "cpu_time": 2.5538522126865731e-01, + "iterations": 2712, + "real_time": 2.5806632675889846e-01, + "cpu_time": 2.5805932227138600e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2756, - "real_time": 2.5834521908632035e-01, - "cpu_time": 2.5833924056603741e-01, + "iterations": 2733, + "real_time": 2.5592657863177792e-01, + "cpu_time": 2.5591782729601126e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2733, - "real_time": 2.8245730964181365e-01, - "cpu_time": 2.8244755031101304e-01, + "iterations": 2804, + "real_time": 2.4944995018452959e-01, + "cpu_time": 2.4944712589158421e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4846, - "real_time": 1.4001663829600866e-01, - "cpu_time": 1.4001249938093291e-01, + "iterations": 5118, + "real_time": 1.3672088059236781e-01, + "cpu_time": 1.3671753341148860e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..ff32d637 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:00:56+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.23, 1.63, 2.49 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 4.87 ms 4.87 ms 144 +MLIR_Conv2D/1 7.19 ms 7.18 ms 97 +Buddy_Conv2D/1 0.526 ms 0.526 ms 1322 +Buddy_Corr2D_Constant_Padding/1 0.793 ms 0.793 ms 885 +OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 560 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4865 +Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2697 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105155 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49883 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3259 +Buddy_Dilation2D_Constant_Padding/1 0.213 ms 0.213 ms 3259 +Buddy_Opening2D_Constant_Padding/1 0.315 ms 0.315 ms 2258 +Buddy_Closing2D_Constant_Padding/1 0.312 ms 0.312 ms 2255 +Buddy_TopHat2D_Constant_Padding/1 0.783 ms 0.783 ms 854 +Buddy_BottomHat2D_Constant_Padding/1 0.785 ms 0.785 ms 821 +OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5119 +OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3167 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3139 +OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2712 +OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2733 +OpenCV_MorphGrad2D_Constant_Padding/1 0.249 ms 0.249 ms 2804 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5118 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index a8cd749b..0c93522f --- a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:15:13+00:00", + "date": "2025-06-01T10:01:20+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [7.93994,16.9375,11.0059], + "load_avg": [1.14795,1.57764,2.44971], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 59, - "real_time": 1.1848425839917134e+01, - "cpu_time": 1.1847907694915255e+01, + "iterations": 61, + "real_time": 1.1490864648682173e+01, + "cpu_time": 1.1490722819672131e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 24, - "real_time": 2.9868476558476686e+01, - "cpu_time": 2.9867346333333341e+01, + "real_time": 2.8909380702922743e+01, + "cpu_time": 2.8909021499999994e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 225, - "real_time": 3.1161748617887497e+00, - "cpu_time": 3.1160428266666669e+00, + "iterations": 231, + "real_time": 3.0301709405400534e+00, + "cpu_time": 3.0300729220779217e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 299, - "real_time": 2.3595800803955580e+00, - "cpu_time": 2.3594202575250853e+00, + "iterations": 303, + "real_time": 2.3071401731015824e+00, + "cpu_time": 2.3070868151815174e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 167, - "real_time": 4.1960542631184987e+00, - "cpu_time": 4.1958419520958117e+00, + "iterations": 171, + "real_time": 4.1065351351311330e+00, + "cpu_time": 4.1064032222222222e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4718, - "real_time": 1.4594548325409268e-01, - "cpu_time": 1.4594251165748195e-01, + "iterations": 4848, + "real_time": 1.4249105528980591e-01, + "cpu_time": 1.4248396947194716e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2624, - "real_time": 2.6661387244353024e-01, - "cpu_time": 2.6660572522865866e-01, + "iterations": 2686, + "real_time": 2.6072073888729869e-01, + "cpu_time": 2.6071289240506312e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102278, - "real_time": 6.8319474596379914e-03, - "cpu_time": 6.8317058311660401e-03, + "iterations": 105211, + "real_time": 6.6560155957683153e-03, + "cpu_time": 6.6558318331733325e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 47123, - "real_time": 1.4845184285984307e-02, - "cpu_time": 1.4844183286293328e-02, + "iterations": 49990, + "real_time": 1.4024660934696533e-02, + "cpu_time": 1.4024203120624117e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3147, - "real_time": 2.2381313694424959e-01, - "cpu_time": 2.2379139084842695e-01, + "iterations": 3269, + "real_time": 2.1371879390107268e-01, + "cpu_time": 2.1370936004894461e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3121, - "real_time": 2.2129215495836960e-01, - "cpu_time": 2.2128436654918285e-01, + "iterations": 3239, + "real_time": 2.1510430982301323e-01, + "cpu_time": 2.1509574992281527e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2029, - "real_time": 3.4534043006822945e-01, - "cpu_time": 3.4533251355347439e-01, + "iterations": 2276, + "real_time": 3.1433540915248681e-01, + "cpu_time": 3.1432626493848848e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2088, - "real_time": 3.3967797929899218e-01, - "cpu_time": 3.3966568295019134e-01, + "iterations": 2257, + "real_time": 3.0752330173366516e-01, + "cpu_time": 3.0751668187859921e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 755, - "real_time": 9.0232322498267847e-01, - "cpu_time": 9.0227498278145613e-01, + "iterations": 840, + "real_time": 8.0359440429934437e-01, + "cpu_time": 8.0356525238095189e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 740, - "real_time": 9.0346561140708026e-01, - "cpu_time": 9.0344697432432508e-01, + "iterations": 836, + "real_time": 7.8342987545298048e-01, + "cpu_time": 7.8342100717703167e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5077, - "real_time": 1.3789620528557836e-01, - "cpu_time": 1.3789097912152867e-01, + "iterations": 5131, + "real_time": 1.3636051011210970e-01, + "cpu_time": 1.3635655388813078e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3073, - "real_time": 2.2964896206897670e-01, - "cpu_time": 2.2963901692157565e-01, + "iterations": 3120, + "real_time": 2.2432351461014685e-01, + "cpu_time": 2.2431598076923051e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3064, - "real_time": 2.2711840156689481e-01, - "cpu_time": 2.2711246866840784e-01, + "iterations": 3078, + "real_time": 2.2727604580243119e-01, + "cpu_time": 2.2726826185834917e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2696, - "real_time": 2.6035577042210706e-01, - "cpu_time": 2.6034106676557928e-01, + "iterations": 2688, + "real_time": 2.6022881640875267e-01, + "cpu_time": 2.6022399479166697e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2699, - "real_time": 2.5962485761698106e-01, - "cpu_time": 2.5961401185624283e-01, + "iterations": 2702, + "real_time": 2.5923438659758147e-01, + "cpu_time": 2.5922455366395236e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2689, - "real_time": 2.6031346023969376e-01, - "cpu_time": 2.6029616586091403e-01, + "iterations": 2764, + "real_time": 2.5320007343130935e-01, + "cpu_time": 2.5319149674384939e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5007, - "real_time": 1.3964356222516028e-01, - "cpu_time": 1.3963851967245872e-01, + "iterations": 5022, + "real_time": 1.3919629242637632e-01, + "cpu_time": 1.3919045081640841e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..a5546517 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:01:20+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.15, 1.58, 2.45 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.5 ms 11.5 ms 61 +MLIR_Conv2D/1 28.9 ms 28.9 ms 24 +Buddy_Conv2D/1 3.03 ms 3.03 ms 231 +Buddy_Corr2D_Constant_Padding/1 2.31 ms 2.31 ms 303 +OpenCV_Filter2D_Constant_Padding/1 4.11 ms 4.11 ms 171 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.142 ms 0.142 ms 4848 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2686 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105211 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49990 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3269 +Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3239 +Buddy_Opening2D_Constant_Padding/1 0.314 ms 0.314 ms 2276 +Buddy_Closing2D_Constant_Padding/1 0.308 ms 0.308 ms 2257 +Buddy_TopHat2D_Constant_Padding/1 0.804 ms 0.804 ms 840 +Buddy_BottomHat2D_Constant_Padding/1 0.783 ms 0.783 ms 836 +OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5131 +OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3120 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3078 +OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2688 +OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2702 +OpenCV_MorphGrad2D_Constant_Padding/1 0.253 ms 0.253 ms 2764 +OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5022 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index ccd6e407..5ce9a83a --- a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:15:37+00:00", + "date": "2025-06-01T10:01:44+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [5.91162,15.7373,10.7656], + "load_avg": [1.09619,1.53027,2.41064], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 59, - "real_time": 1.1816273490756245e+01, - "cpu_time": 1.1815782491525423e+01, + "iterations": 61, + "real_time": 1.1557955600199152e+01, + "cpu_time": 1.1557535721311476e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23, - "real_time": 2.9844296529241230e+01, - "cpu_time": 2.9842673869565211e+01, + "iterations": 24, + "real_time": 2.8690188502271969e+01, + "cpu_time": 2.8689408208333333e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 225, - "real_time": 3.1147927790880203e+00, - "cpu_time": 3.1147311333333345e+00, + "iterations": 231, + "real_time": 3.0273355350091862e+00, + "cpu_time": 3.0272740259740258e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 297, - "real_time": 2.3587474267089408e+00, - "cpu_time": 2.3585573636363653e+00, + "iterations": 302, + "real_time": 2.3065690901007083e+00, + "cpu_time": 2.3065092152317894e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 165, - "real_time": 4.2047196716973279e+00, - "cpu_time": 4.2045767636363633e+00, + "iterations": 170, + "real_time": 4.1036540971082802e+00, + "cpu_time": 4.1035620235294115e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4749, - "real_time": 1.4566486533892031e-01, - "cpu_time": 1.4565762518424935e-01, + "iterations": 4835, + "real_time": 1.4248023227612261e-01, + "cpu_time": 1.4247486577042395e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2628, - "real_time": 2.6631513128519241e-01, - "cpu_time": 2.6630184817351604e-01, + "iterations": 2693, + "real_time": 2.5990511426640583e-01, + "cpu_time": 2.5989337541774982e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102987, - "real_time": 6.7866966045152712e-03, - "cpu_time": 6.7864467457057695e-03, + "iterations": 104962, + "real_time": 6.6757263726710285e-03, + "cpu_time": 6.6756417274823291e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48924, - "real_time": 1.4303465876443516e-02, - "cpu_time": 1.4303076138500528e-02, + "iterations": 49843, + "real_time": 1.4048672123858236e-02, + "cpu_time": 1.4048335934835362e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3176, - "real_time": 2.2159935789234392e-01, - "cpu_time": 2.2158961429471016e-01, + "iterations": 3259, + "real_time": 2.1453584358896277e-01, + "cpu_time": 2.1452855630561513e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3152, - "real_time": 2.2156847473044838e-01, - "cpu_time": 2.2155957265228407e-01, + "iterations": 3254, + "real_time": 2.1427505911220857e-01, + "cpu_time": 2.1427030762138938e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1998, - "real_time": 3.5863460225236787e-01, - "cpu_time": 3.5861286786786817e-01, + "iterations": 2262, + "real_time": 3.0703992178543388e-01, + "cpu_time": 3.0703200707338635e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1943, - "real_time": 3.3647872074265117e-01, - "cpu_time": 3.3646851621204243e-01, + "iterations": 2241, + "real_time": 3.1868767929178221e-01, + "cpu_time": 3.1867821329763490e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 750, - "real_time": 9.0944948295752204e-01, - "cpu_time": 9.0935812666666738e-01, + "iterations": 851, + "real_time": 7.8355771140332231e-01, + "cpu_time": 7.8351834195064463e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 737, - "real_time": 9.1223105323201448e-01, - "cpu_time": 9.1217854274084309e-01, + "iterations": 840, + "real_time": 7.7708095666908084e-01, + "cpu_time": 7.7707133690476238e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5030, - "real_time": 1.3899165307491959e-01, - "cpu_time": 1.3898452544731602e-01, + "iterations": 5192, + "real_time": 1.3475235621911261e-01, + "cpu_time": 1.3474997862095520e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3017, - "real_time": 2.2983206278605298e-01, - "cpu_time": 2.2982459032151112e-01, + "iterations": 3030, + "real_time": 2.3070538643956578e-01, + "cpu_time": 2.3069644389438895e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3170, - "real_time": 2.2272756176322039e-01, - "cpu_time": 2.2271838801261890e-01, + "iterations": 3053, + "real_time": 2.2915481686435915e-01, + "cpu_time": 2.2914178480183464e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2657, - "real_time": 2.6656502677638771e-01, - "cpu_time": 2.6655630447873591e-01, + "iterations": 2609, + "real_time": 2.6790612302620598e-01, + "cpu_time": 2.6789994863932470e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2690, - "real_time": 2.6213287905578719e-01, - "cpu_time": 2.6212498327137529e-01, + "iterations": 2624, + "real_time": 2.6675567340401068e-01, + "cpu_time": 2.6674890320121936e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2671, - "real_time": 2.6169095993131253e-01, - "cpu_time": 2.6167889779108955e-01, + "iterations": 2714, + "real_time": 2.5794556255980106e-01, + "cpu_time": 2.5793842372881365e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4878, - "real_time": 1.4317088713468515e-01, - "cpu_time": 1.4316560619106178e-01, + "iterations": 5119, + "real_time": 1.3671573276956650e-01, + "cpu_time": 1.3671077788630612e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..7365203e --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:01:44+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.10, 1.53, 2.41 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 11.6 ms 11.6 ms 61 +MLIR_Conv2D/1 28.7 ms 28.7 ms 24 +Buddy_Conv2D/1 3.03 ms 3.03 ms 231 +Buddy_Corr2D_Constant_Padding/1 2.31 ms 2.31 ms 302 +OpenCV_Filter2D_Constant_Padding/1 4.10 ms 4.10 ms 170 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.142 ms 0.142 ms 4835 +Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2693 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104962 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49843 +Buddy_Erosion2D_Constant_Padding/1 0.215 ms 0.215 ms 3259 +Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3254 +Buddy_Opening2D_Constant_Padding/1 0.307 ms 0.307 ms 2262 +Buddy_Closing2D_Constant_Padding/1 0.319 ms 0.319 ms 2241 +Buddy_TopHat2D_Constant_Padding/1 0.784 ms 0.784 ms 851 +Buddy_BottomHat2D_Constant_Padding/1 0.777 ms 0.777 ms 840 +OpenCV_Erode2D_Constant_Padding/1 0.135 ms 0.135 ms 5192 +OpenCV_Opening2D_Constant_Padding/1 0.231 ms 0.231 ms 3030 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3053 +OpenCV_TopHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2609 +OpenCV_BottomHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2624 +OpenCV_MorphGrad2D_Constant_Padding/1 0.258 ms 0.258 ms 2714 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5119 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index d36004ad..ddaa93a1 --- a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:16:02+00:00", + "date": "2025-06-01T10:02:08+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [4.57568,14.6328,10.5317], + "load_avg": [1.06201,1.48633,2.37158], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 32, - "real_time": 2.1973324648570269e+01, - "cpu_time": 2.1972224593749999e+01, + "iterations": 31, + "real_time": 2.1583555506602412e+01, + "cpu_time": 2.1583274354838714e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 10, - "real_time": 6.8125827610492706e+01, - "cpu_time": 6.8118218799999994e+01, + "iterations": 11, + "real_time": 6.6695000637661323e+01, + "cpu_time": 6.6693548545454576e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 109, - "real_time": 6.3353857931193955e+00, - "cpu_time": 6.3351724220183510e+00, + "iterations": 114, + "real_time": 6.1205455118365455e+00, + "cpu_time": 6.1202609210526315e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 148, - "real_time": 4.7007313224713547e+00, - "cpu_time": 4.7003578243243220e+00, + "iterations": 151, + "real_time": 4.6537321504970262e+00, + "cpu_time": 4.6535720132450340e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 79, - "real_time": 8.8025724538896650e+00, - "cpu_time": 8.8023394810126590e+00, + "iterations": 81, + "real_time": 8.5981621142522791e+00, + "cpu_time": 8.5979825679012354e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4771, - "real_time": 1.4587134018218684e-01, - "cpu_time": 1.4586479689792500e-01, + "iterations": 4847, + "real_time": 1.4300990679877945e-01, + "cpu_time": 1.4300421848566114e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2626, - "real_time": 2.6619548271697435e-01, - "cpu_time": 2.6618333244478276e-01, + "iterations": 2686, + "real_time": 2.6083683521554085e-01, + "cpu_time": 2.6083052643335813e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103087, - "real_time": 6.7907857617304848e-03, - "cpu_time": 6.7906139959451753e-03, + "iterations": 105138, + "real_time": 6.6629409044258181e-03, + "cpu_time": 6.6626790979474621e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48675, - "real_time": 1.4325042925373248e-02, - "cpu_time": 1.4324671186440667e-02, + "iterations": 49889, + "real_time": 1.4021988875160225e-02, + "cpu_time": 1.4021621239150914e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3188, - "real_time": 2.1997994744456098e-01, - "cpu_time": 2.1996185319949818e-01, + "iterations": 3257, + "real_time": 2.1324698299416597e-01, + "cpu_time": 2.1323381915873477e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3182, - "real_time": 2.2068286009043886e-01, - "cpu_time": 2.2067045663104931e-01, + "iterations": 3242, + "real_time": 2.1325541403946649e-01, + "cpu_time": 2.1324987754472560e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2146, - "real_time": 3.3393757676450536e-01, - "cpu_time": 3.3392018080149133e-01, + "iterations": 2237, + "real_time": 3.1306709849413450e-01, + "cpu_time": 3.1305870138578440e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2142, - "real_time": 3.9424579399250914e-01, - "cpu_time": 3.9422578664799229e-01, + "iterations": 2232, + "real_time": 3.1772318903663893e-01, + "cpu_time": 3.1771430913978527e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 745, - "real_time": 9.0135914967364117e-01, - "cpu_time": 9.0132612617449748e-01, + "iterations": 853, + "real_time": 7.7462966301656411e-01, + "cpu_time": 7.7457911137162871e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 722, - "real_time": 9.1764490556106015e-01, - "cpu_time": 9.1758207894736721e-01, + "iterations": 846, + "real_time": 7.8768119761988908e-01, + "cpu_time": 7.8767234278959741e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5046, - "real_time": 1.3835566303664334e-01, - "cpu_time": 1.3834791062227497e-01, + "iterations": 5075, + "real_time": 1.3787841451872745e-01, + "cpu_time": 1.3787315527093585e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3129, - "real_time": 2.2442531306147767e-01, - "cpu_time": 2.2441539757110901e-01, + "iterations": 3015, + "real_time": 2.3217374029543073e-01, + "cpu_time": 2.3216454626865654e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3078, - "real_time": 2.2707124742364015e-01, - "cpu_time": 2.2706264035087806e-01, + "iterations": 3114, + "real_time": 2.2473948060815427e-01, + "cpu_time": 2.2473018657675037e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2660, - "real_time": 2.6315047818803250e-01, - "cpu_time": 2.6313447218045083e-01, + "iterations": 2647, + "real_time": 2.6444323265219255e-01, + "cpu_time": 2.6443637363052508e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2723, - "real_time": 2.5931872657301491e-01, - "cpu_time": 2.5930012449504203e-01, + "iterations": 2672, + "real_time": 2.6206671633941686e-01, + "cpu_time": 2.6205630389221563e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2685, - "real_time": 2.6018807744291683e-01, - "cpu_time": 2.6018054525139650e-01, + "iterations": 2749, + "real_time": 2.5497903798701071e-01, + "cpu_time": 2.5497087522735551e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4978, - "real_time": 1.4037757216926175e-01, - "cpu_time": 1.4037138931297666e-01, + "iterations": 5111, + "real_time": 1.3701243949622191e-01, + "cpu_time": 1.3700826589708445e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..18a4d5c0 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:02:08+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.06, 1.49, 2.37 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 21.6 ms 21.6 ms 31 +MLIR_Conv2D/1 66.7 ms 66.7 ms 11 +Buddy_Conv2D/1 6.12 ms 6.12 ms 114 +Buddy_Corr2D_Constant_Padding/1 4.65 ms 4.65 ms 151 +OpenCV_Filter2D_Constant_Padding/1 8.60 ms 8.60 ms 81 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4847 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2686 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105138 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49889 +Buddy_Erosion2D_Constant_Padding/1 0.213 ms 0.213 ms 3257 +Buddy_Dilation2D_Constant_Padding/1 0.213 ms 0.213 ms 3242 +Buddy_Opening2D_Constant_Padding/1 0.313 ms 0.313 ms 2237 +Buddy_Closing2D_Constant_Padding/1 0.318 ms 0.318 ms 2232 +Buddy_TopHat2D_Constant_Padding/1 0.775 ms 0.775 ms 853 +Buddy_BottomHat2D_Constant_Padding/1 0.788 ms 0.788 ms 846 +OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5075 +OpenCV_Opening2D_Constant_Padding/1 0.232 ms 0.232 ms 3015 +OpenCV_Closing2D_Constant_Padding/1 0.225 ms 0.225 ms 3114 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2647 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2672 +OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2749 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5111 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index 34b97b05..5e6f1f5e --- a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:16:26+00:00", + "date": "2025-06-01T10:02:32+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.6958,13.6172,10.3042], + "load_avg": [1.03955,1.44629,2.33398], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 32, - "real_time": 2.2072315390687436e+01, - "cpu_time": 2.2071622781250003e+01, + "real_time": 2.1539924258831888e+01, + "cpu_time": 2.1539544500000002e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 10, - "real_time": 6.8203160911798477e+01, - "cpu_time": 6.8199147800000006e+01, + "iterations": 11, + "real_time": 6.6645104946060613e+01, + "cpu_time": 6.6641695999999982e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 110, - "real_time": 6.3550283455035901e+00, - "cpu_time": 6.3548004909090885e+00, + "iterations": 114, + "real_time": 6.1256526397508484e+00, + "cpu_time": 6.1255026578947369e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 149, - "real_time": 4.7013974254763369e+00, - "cpu_time": 4.7011444362416119e+00, + "iterations": 151, + "real_time": 4.6481645561211948e+00, + "cpu_time": 4.6480527218543033e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 80, - "real_time": 8.8078494882211089e+00, - "cpu_time": 8.8076434625000033e+00, + "iterations": 81, + "real_time": 8.5967951396733149e+00, + "cpu_time": 8.5964210617283996e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4781, - "real_time": 1.4572581434082821e-01, - "cpu_time": 1.4571423677055004e-01, + "iterations": 4857, + "real_time": 1.4274283807931035e-01, + "cpu_time": 1.4273323533045087e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2627, - "real_time": 2.6675003648462248e-01, - "cpu_time": 2.6674071488389783e-01, + "iterations": 2693, + "real_time": 2.6048750316083008e-01, + "cpu_time": 2.6047936019309320e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102862, - "real_time": 6.8005258963692670e-03, - "cpu_time": 6.8001470805545260e-03, + "iterations": 105362, + "real_time": 6.6484719802228963e-03, + "cpu_time": 6.6481730130407476e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48877, - "real_time": 1.4313261689290913e-02, - "cpu_time": 1.4312918346052323e-02, + "iterations": 49959, + "real_time": 1.4013711762532320e-02, + "cpu_time": 1.4013170539842676e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3093, - "real_time": 2.2363355470597840e-01, - "cpu_time": 2.2361846039443906e-01, + "iterations": 3220, + "real_time": 2.1420254296860339e-01, + "cpu_time": 2.1418366801242208e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3107, - "real_time": 2.2298299821676992e-01, - "cpu_time": 2.2297160830383037e-01, + "iterations": 3263, + "real_time": 2.1453051982830537e-01, + "cpu_time": 2.1452533159669049e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2034, - "real_time": 3.4434181585863982e-01, - "cpu_time": 3.4430977974434573e-01, + "iterations": 2246, + "real_time": 3.0990404044530906e-01, + "cpu_time": 3.0988853205699018e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1989, - "real_time": 3.4222938963191002e-01, - "cpu_time": 3.4221931523378590e-01, + "iterations": 2217, + "real_time": 3.1024105527619278e-01, + "cpu_time": 3.1023018267929692e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 754, - "real_time": 8.9190859070902473e-01, - "cpu_time": 8.9187836339522575e-01, + "iterations": 828, + "real_time": 7.7828986948167067e-01, + "cpu_time": 7.7822905797101383e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 736, - "real_time": 9.1232408000075293e-01, - "cpu_time": 9.1226822146739139e-01, + "iterations": 833, + "real_time": 7.9281703040164775e-01, + "cpu_time": 7.9278437575029825e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5028, - "real_time": 1.3946793683009576e-01, - "cpu_time": 1.3945602704852819e-01, + "iterations": 5129, + "real_time": 1.3618203899413667e-01, + "cpu_time": 1.3617622031585130e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3031, - "real_time": 2.2504380306506150e-01, - "cpu_time": 2.2503673309138941e-01, + "iterations": 3091, + "real_time": 2.2648242960582463e-01, + "cpu_time": 2.2647447460368761e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3131, - "real_time": 2.2496341949870738e-01, - "cpu_time": 2.2495109709358108e-01, + "iterations": 3096, + "real_time": 2.2607082164141251e-01, + "cpu_time": 2.2606457816537487e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2697, - "real_time": 2.5882281868555918e-01, - "cpu_time": 2.5881240934371474e-01, + "iterations": 2677, + "real_time": 2.6060663286122004e-01, + "cpu_time": 2.6059899327605501e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2706, - "real_time": 2.5917346474314651e-01, - "cpu_time": 2.5916015003695453e-01, + "iterations": 2684, + "real_time": 2.6077424813111921e-01, + "cpu_time": 2.6076512742175839e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2674, - "real_time": 2.6137103022214808e-01, - "cpu_time": 2.6136243231114431e-01, + "iterations": 2763, + "real_time": 2.5325366998663684e-01, + "cpu_time": 2.5324759645313089e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4993, - "real_time": 1.5533545225912407e-01, - "cpu_time": 1.5532848247546552e-01, + "iterations": 5132, + "real_time": 1.3630549929345018e-01, + "cpu_time": 1.3630290568979009e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..5610a081 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:02:32+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.04, 1.45, 2.33 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 21.5 ms 21.5 ms 32 +MLIR_Conv2D/1 66.6 ms 66.6 ms 11 +Buddy_Conv2D/1 6.13 ms 6.13 ms 114 +Buddy_Corr2D_Constant_Padding/1 4.65 ms 4.65 ms 151 +OpenCV_Filter2D_Constant_Padding/1 8.60 ms 8.60 ms 81 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4857 +Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2693 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105362 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49959 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3220 +Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3263 +Buddy_Opening2D_Constant_Padding/1 0.310 ms 0.310 ms 2246 +Buddy_Closing2D_Constant_Padding/1 0.310 ms 0.310 ms 2217 +Buddy_TopHat2D_Constant_Padding/1 0.778 ms 0.778 ms 828 +Buddy_BottomHat2D_Constant_Padding/1 0.793 ms 0.793 ms 833 +OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5129 +OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3091 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3096 +OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2677 +OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2684 +OpenCV_MorphGrad2D_Constant_Padding/1 0.253 ms 0.253 ms 2763 +OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5132 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json old mode 100755 new mode 100644 index dec55d36..f7895fce --- a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:16:50+00:00", + "date": "2025-06-01T10:02:56+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.11621,12.6836,10.0825], + "load_avg": [1.02783,1.4165,2.30469], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 20, - "real_time": 3.5189422406256199e+01, - "cpu_time": 3.5187518800000007e+01, + "iterations": 17, + "real_time": 4.1542540359146457e+01, + "cpu_time": 4.1541568352941177e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 6, - "real_time": 1.2122860985497634e+02, - "cpu_time": 1.2122526516666669e+02, + "iterations": 5, + "real_time": 1.4357046708464622e+02, + "cpu_time": 1.4356582000000003e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 64, - "real_time": 1.0844544944120571e+01, - "cpu_time": 1.0844293578125004e+01, + "iterations": 67, + "real_time": 1.0501373142226418e+01, + "cpu_time": 1.0501049492537312e+01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 89, - "real_time": 7.9097353298677486e+00, - "cpu_time": 7.9092355393258478e+00, + "iterations": 90, + "real_time": 7.9487750720646648e+00, + "cpu_time": 7.9483699222222235e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 118, - "real_time": 5.9020572452474447e+00, - "cpu_time": 5.9018793644067769e+00, + "iterations": 120, + "real_time": 5.8866093711306648e+00, + "cpu_time": 5.8862758833333402e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4772, - "real_time": 1.4571150201189348e-01, - "cpu_time": 1.4570317455993292e-01, + "iterations": 4856, + "real_time": 1.4254383748946511e-01, + "cpu_time": 1.4253524814662261e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2626, - "real_time": 2.6634765004417138e-01, - "cpu_time": 2.6633460662604719e-01, + "iterations": 2692, + "real_time": 2.6072924098700151e-01, + "cpu_time": 2.6071364598811275e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102812, - "real_time": 6.8068346751817806e-03, - "cpu_time": 6.8065919639730731e-03, + "iterations": 105165, + "real_time": 6.6590586224041202e-03, + "cpu_time": 6.6586980934721625e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48301, - "real_time": 1.4475951286809114e-02, - "cpu_time": 1.4475346970042036e-02, + "iterations": 49405, + "real_time": 1.4179655594956524e-02, + "cpu_time": 1.4179255581418873e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3183, - "real_time": 2.1950167024749800e-01, - "cpu_time": 2.1948676625824695e-01, + "iterations": 3218, + "real_time": 2.1431234713257136e-01, + "cpu_time": 2.1430480049720282e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3163, - "real_time": 2.1986513941245783e-01, - "cpu_time": 2.1985423174201710e-01, + "iterations": 3226, + "real_time": 2.1530301595927762e-01, + "cpu_time": 2.1529161376317418e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2017, - "real_time": 3.3586876661723120e-01, - "cpu_time": 3.3583024392662403e-01, + "iterations": 2175, + "real_time": 3.1200053020455371e-01, + "cpu_time": 3.1198446252873530e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2024, - "real_time": 3.5512142928810458e-01, - "cpu_time": 3.5510094416996085e-01, + "iterations": 2264, + "real_time": 3.1231050609936772e-01, + "cpu_time": 3.1229766696113065e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 749, - "real_time": 9.0315306835244591e-01, - "cpu_time": 9.0306743658211086e-01, + "iterations": 843, + "real_time": 8.2120473230365898e-01, + "cpu_time": 8.2114858362989307e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 730, - "real_time": 9.0755133392059639e-01, - "cpu_time": 9.0750758493150741e-01, + "iterations": 844, + "real_time": 8.1806024757182993e-01, + "cpu_time": 8.1802246800947831e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5038, - "real_time": 1.3894656190220539e-01, - "cpu_time": 1.3893922211194934e-01, + "iterations": 5142, + "real_time": 1.3589408018277435e-01, + "cpu_time": 1.3588349844418496e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3212, - "real_time": 2.2129651453638285e-01, - "cpu_time": 2.2129014912826861e-01, + "iterations": 3174, + "real_time": 2.2059779499188564e-01, + "cpu_time": 2.2059140012602413e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3171, - "real_time": 2.2047466810796210e-01, - "cpu_time": 2.2046259854935385e-01, + "iterations": 3164, + "real_time": 2.2124412249392264e-01, + "cpu_time": 2.2124253350189718e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2701, - "real_time": 2.5345145365080540e-01, - "cpu_time": 2.5344063087745250e-01, + "iterations": 2735, + "real_time": 2.5577826435648765e-01, + "cpu_time": 2.5577077038391222e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2755, - "real_time": 2.5492152249856348e-01, - "cpu_time": 2.5491474192377500e-01, + "iterations": 2710, + "real_time": 2.5817889005385641e-01, + "cpu_time": 2.5817290959409672e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2727, - "real_time": 2.5723294054779938e-01, - "cpu_time": 2.5722383608360888e-01, + "iterations": 2787, + "real_time": 2.5088470770032079e-01, + "cpu_time": 2.5087844312881219e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5053, - "real_time": 1.3811498115127827e-01, - "cpu_time": 1.3810704155946990e-01, + "iterations": 5117, + "real_time": 1.3666728594083211e-01, + "cpu_time": 1.3666393453195239e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log new file mode 100644 index 00000000..f707d6a0 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:02:56+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.03, 1.42, 2.30 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 41.5 ms 41.5 ms 17 +MLIR_Conv2D/1 144 ms 144 ms 5 +Buddy_Conv2D/1 10.5 ms 10.5 ms 67 +Buddy_Corr2D_Constant_Padding/1 7.95 ms 7.95 ms 90 +OpenCV_Filter2D_Constant_Padding/1 5.89 ms 5.89 ms 120 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4856 +Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2692 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105165 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49405 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3218 +Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3226 +Buddy_Opening2D_Constant_Padding/1 0.312 ms 0.312 ms 2175 +Buddy_Closing2D_Constant_Padding/1 0.312 ms 0.312 ms 2264 +Buddy_TopHat2D_Constant_Padding/1 0.821 ms 0.821 ms 843 +Buddy_BottomHat2D_Constant_Padding/1 0.818 ms 0.818 ms 844 +OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5142 +OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3174 +OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3164 +OpenCV_TopHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2735 +OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2710 +OpenCV_MorphGrad2D_Constant_Padding/1 0.251 ms 0.251 ms 2787 +OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5117 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json old mode 100755 new mode 100644 index c85f8aec..7aeb0253 --- a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T22:17:14+00:00", + "date": "2025-06-01T10:03:20+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.73438,11.8242,9.8667], + "load_avg": [1.01709,1.38184,2.26807], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 20, - "real_time": 3.5257031954824924e+01, - "cpu_time": 3.5254986799999998e+01, + "real_time": 3.4217556379735470e+01, + "cpu_time": 3.4216377350000002e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 6, - "real_time": 1.2104108215620120e+02, - "cpu_time": 1.2103646300000001e+02, + "real_time": 1.1868627804021041e+02, + "cpu_time": 1.1867920516666668e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 64, - "real_time": 1.0882732516620308e+01, - "cpu_time": 1.0882239125000005e+01, + "iterations": 67, + "real_time": 1.0498678089300199e+01, + "cpu_time": 1.0498225208955224e+01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 88, - "real_time": 7.9285275106402961e+00, - "cpu_time": 7.9281318181818197e+00, + "iterations": 90, + "real_time": 7.8919309501846628e+00, + "cpu_time": 7.8916826222222225e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 118, - "real_time": 5.9106075757388341e+00, - "cpu_time": 5.9102044830508484e+00, + "iterations": 119, + "real_time": 5.8879929305124685e+00, + "cpu_time": 5.8877909243697495e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4703, - "real_time": 1.4615413843127936e-01, - "cpu_time": 1.4614629045290239e-01, + "iterations": 4857, + "real_time": 1.4250408969274980e-01, + "cpu_time": 1.4250040910026768e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2264, - "real_time": 2.7192003906510320e-01, - "cpu_time": 2.7190400795052999e-01, + "iterations": 2690, + "real_time": 2.6047558395614412e-01, + "cpu_time": 2.6046792565055787e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103210, - "real_time": 6.7838188378100085e-03, - "cpu_time": 6.7834774924910354e-03, + "iterations": 105068, + "real_time": 6.6534977810603496e-03, + "cpu_time": 6.6533541230441263e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48545, - "real_time": 1.4473049482143629e-02, - "cpu_time": 1.4472298918529192e-02, + "iterations": 49449, + "real_time": 1.4174878487268949e-02, + "cpu_time": 1.4174551719953883e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3132, - "real_time": 2.2119966527092655e-01, - "cpu_time": 2.2118326660280993e-01, + "iterations": 3244, + "real_time": 2.1404832539535626e-01, + "cpu_time": 2.1404216307028326e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3160, - "real_time": 2.2111865279229381e-01, - "cpu_time": 2.2110744240506322e-01, + "iterations": 3243, + "real_time": 2.2520982659491645e-01, + "cpu_time": 2.2519997533148287e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2027, - "real_time": 3.3099344161030808e-01, - "cpu_time": 3.3097353527380324e-01, + "iterations": 2260, + "real_time": 3.0706161385880110e-01, + "cpu_time": 3.0704632876106208e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2017, - "real_time": 3.5027682275156247e-01, - "cpu_time": 3.5026231036192423e-01, + "iterations": 2223, + "real_time": 3.1307990965653365e-01, + "cpu_time": 3.1306463832658527e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 750, - "real_time": 9.0147645026445389e-01, - "cpu_time": 9.0144423999999890e-01, + "iterations": 827, + "real_time": 8.1769454246781037e-01, + "cpu_time": 8.1765177992745008e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 761, - "real_time": 8.7758632188322672e-01, - "cpu_time": 8.7753931011826602e-01, + "iterations": 861, + "real_time": 7.9650212507630058e-01, + "cpu_time": 7.9645093612078921e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5027, - "real_time": 1.3902871209037976e-01, - "cpu_time": 1.3902262721304959e-01, + "iterations": 5101, + "real_time": 1.3665298364765854e-01, + "cpu_time": 1.3664968535581246e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3038, - "real_time": 2.3011283285626460e-01, - "cpu_time": 2.3010441968400233e-01, + "iterations": 3187, + "real_time": 2.1934081926815630e-01, + "cpu_time": 2.1933629055538131e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3072, - "real_time": 2.2764660631461689e-01, - "cpu_time": 2.2763665071614628e-01, + "iterations": 3142, + "real_time": 2.2223215659263407e-01, + "cpu_time": 2.2222677021005652e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2693, - "real_time": 2.6117778368324673e-01, - "cpu_time": 2.6115971258819154e-01, + "iterations": 2731, + "real_time": 2.5569188675571386e-01, + "cpu_time": 2.5568335042109103e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2663, - "real_time": 2.6503028407541829e-01, - "cpu_time": 2.6501603755163383e-01, + "iterations": 2740, + "real_time": 2.5545844373150461e-01, + "cpu_time": 2.5545215291970780e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2707, - "real_time": 2.5905529948830119e-01, - "cpu_time": 2.5904600775766579e-01, + "iterations": 2815, + "real_time": 2.4856514103882690e-01, + "cpu_time": 2.4855898934280626e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5003, - "real_time": 1.3957380974457165e-01, - "cpu_time": 1.3957011373176126e-01, + "iterations": 5206, + "real_time": 1.3451610773778905e-01, + "cpu_time": 1.3450989838647703e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log new file mode 100644 index 00000000..f576c1e9 --- /dev/null +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -0,0 +1,56 @@ +2025-06-01T10:03:20+00:00 +Running ./bin/image-processing-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.02, 1.38, 2.27 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------------------- +Eigen_Convolve2D/1 34.2 ms 34.2 ms 20 +MLIR_Conv2D/1 119 ms 119 ms 6 +Buddy_Conv2D/1 10.5 ms 10.5 ms 67 +Buddy_Corr2D_Constant_Padding/1 7.89 ms 7.89 ms 90 +OpenCV_Filter2D_Constant_Padding/1 5.89 ms 5.89 ms 119 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4857 +Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2690 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105068 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49449 +Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3244 +Buddy_Dilation2D_Constant_Padding/1 0.225 ms 0.225 ms 3243 +Buddy_Opening2D_Constant_Padding/1 0.307 ms 0.307 ms 2260 +Buddy_Closing2D_Constant_Padding/1 0.313 ms 0.313 ms 2223 +Buddy_TopHat2D_Constant_Padding/1 0.818 ms 0.818 ms 827 +Buddy_BottomHat2D_Constant_Padding/1 0.797 ms 0.796 ms 861 +OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5101 +OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3187 +OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3142 +OpenCV_TopHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2731 +OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2740 +OpenCV_MorphGrad2D_Constant_Padding/1 0.249 ms 0.249 ms 2815 +OpenCV_Dilate2D_Constant_Padding/1 0.135 ms 0.135 ms 5206 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log old mode 100755 new mode 100644 index b7564e13..3a6bab16 --- a/test_result/imageprocessing/image-processing-result.log +++ b/test_result/imageprocessing/image-processing-result.log @@ -1,1000 +1,66 @@ -Benchmark results - Mon May 26 22:13:36 UTC 2025 +Benchmark results - Sun Jun 1 09:59:37 UTC 2025 Testing SSE support SSE is supported. Running image-processing-benchmark for SSE Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:13:36+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 30.76, 22.52, 11.98 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.98 ms 4.98 ms 140 -MLIR_Conv2D/1 7.36 ms 7.36 ms 95 -Buddy_Conv2D/1 0.438 ms 0.438 ms 1585 -Buddy_Corr2D_Constant_Padding/1 0.796 ms 0.796 ms 879 -OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 549 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4708 -Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2618 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102964 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48907 -Buddy_Erosion2D_Constant_Padding/1 0.233 ms 0.233 ms 3019 -Buddy_Dilation2D_Constant_Padding/1 0.231 ms 0.231 ms 3004 -Buddy_Opening2D_Constant_Padding/1 0.335 ms 0.335 ms 2064 -Buddy_Closing2D_Constant_Padding/1 0.339 ms 0.339 ms 2044 -Buddy_TopHat2D_Constant_Padding/1 0.856 ms 0.856 ms 782 -Buddy_BottomHat2D_Constant_Padding/1 0.851 ms 0.851 ms 791 -OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 5012 -OpenCV_Opening2D_Constant_Padding/1 0.215 ms 0.215 ms 3259 -OpenCV_Closing2D_Constant_Padding/1 0.215 ms 0.215 ms 3254 -OpenCV_TopHat2D_Constant_Padding/1 0.249 ms 0.249 ms 2816 -OpenCV_BottomHat2D_Constant_Padding/1 0.249 ms 0.249 ms 2807 -OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2604 -OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4985 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:14:00+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 20.95, 20.87, 11.71 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.86 ms 4.86 ms 144 -MLIR_Conv2D/1 7.36 ms 7.36 ms 95 -Buddy_Conv2D/1 0.444 ms 0.444 ms 1587 -Buddy_Corr2D_Constant_Padding/1 0.796 ms 0.796 ms 850 -OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 549 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4759 -Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2628 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102952 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48945 -Buddy_Erosion2D_Constant_Padding/1 0.223 ms 0.223 ms 3245 -Buddy_Dilation2D_Constant_Padding/1 0.231 ms 0.231 ms 2941 -Buddy_Opening2D_Constant_Padding/1 0.364 ms 0.364 ms 1920 -Buddy_Closing2D_Constant_Padding/1 0.363 ms 0.363 ms 1942 -Buddy_TopHat2D_Constant_Padding/1 0.953 ms 0.953 ms 686 -Buddy_BottomHat2D_Constant_Padding/1 1.05 ms 1.05 ms 687 -OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 4813 -OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3185 -OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3185 -OpenCV_TopHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2747 -OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2760 -OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2735 -OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4913 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:14:25+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 14.48, 19.35, 11.45 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.87 ms 4.87 ms 143 -MLIR_Conv2D/1 7.38 ms 7.38 ms 95 -Buddy_Conv2D/1 0.526 ms 0.526 ms 1300 -Buddy_Corr2D_Constant_Padding/1 0.818 ms 0.818 ms 860 -OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 548 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4746 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2630 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102577 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48983 -Buddy_Erosion2D_Constant_Padding/1 0.221 ms 0.221 ms 3160 -Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3155 -Buddy_Opening2D_Constant_Padding/1 0.409 ms 0.409 ms 2114 -Buddy_Closing2D_Constant_Padding/1 0.400 ms 0.400 ms 1704 -Buddy_TopHat2D_Constant_Padding/1 0.980 ms 0.980 ms 677 -Buddy_BottomHat2D_Constant_Padding/1 0.972 ms 0.972 ms 684 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5055 -OpenCV_Opening2D_Constant_Padding/1 0.213 ms 0.213 ms 3286 -OpenCV_Closing2D_Constant_Padding/1 0.216 ms 0.216 ms 3307 -OpenCV_TopHat2D_Constant_Padding/1 0.243 ms 0.243 ms 2840 -OpenCV_BottomHat2D_Constant_Padding/1 0.243 ms 0.243 ms 2838 -OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2741 -OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5019 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:14:49+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 10.30, 17.97, 11.20 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.87 ms 4.87 ms 143 -MLIR_Conv2D/1 7.38 ms 7.38 ms 95 -Buddy_Conv2D/1 0.527 ms 0.527 ms 1375 -Buddy_Corr2D_Constant_Padding/1 0.816 ms 0.816 ms 857 -OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 548 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4712 -Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2626 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103057 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48615 -Buddy_Erosion2D_Constant_Padding/1 0.221 ms 0.221 ms 3174 -Buddy_Dilation2D_Constant_Padding/1 0.220 ms 0.220 ms 3168 -Buddy_Opening2D_Constant_Padding/1 0.329 ms 0.329 ms 2081 -Buddy_Closing2D_Constant_Padding/1 0.328 ms 0.328 ms 2123 -Buddy_TopHat2D_Constant_Padding/1 0.885 ms 0.885 ms 787 -Buddy_BottomHat2D_Constant_Padding/1 0.864 ms 0.864 ms 762 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5037 -OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3118 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3165 -OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2680 -OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2756 -OpenCV_MorphGrad2D_Constant_Padding/1 0.282 ms 0.282 ms 2733 -OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4846 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:15:13+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 7.94, 16.94, 11.01 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.8 ms 11.8 ms 59 -MLIR_Conv2D/1 29.9 ms 29.9 ms 24 -Buddy_Conv2D/1 3.12 ms 3.12 ms 225 -Buddy_Corr2D_Constant_Padding/1 2.36 ms 2.36 ms 299 -OpenCV_Filter2D_Constant_Padding/1 4.20 ms 4.20 ms 167 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4718 -Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2624 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102278 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47123 -Buddy_Erosion2D_Constant_Padding/1 0.224 ms 0.224 ms 3147 -Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3121 -Buddy_Opening2D_Constant_Padding/1 0.345 ms 0.345 ms 2029 -Buddy_Closing2D_Constant_Padding/1 0.340 ms 0.340 ms 2088 -Buddy_TopHat2D_Constant_Padding/1 0.902 ms 0.902 ms 755 -Buddy_BottomHat2D_Constant_Padding/1 0.903 ms 0.903 ms 740 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5077 -OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3073 -OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3064 -OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2696 -OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2699 -OpenCV_MorphGrad2D_Constant_Padding/1 0.260 ms 0.260 ms 2689 -OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 5007 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:15:37+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 5.91, 15.74, 10.77 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.8 ms 11.8 ms 59 -MLIR_Conv2D/1 29.8 ms 29.8 ms 23 -Buddy_Conv2D/1 3.11 ms 3.11 ms 225 -Buddy_Corr2D_Constant_Padding/1 2.36 ms 2.36 ms 297 -OpenCV_Filter2D_Constant_Padding/1 4.20 ms 4.20 ms 165 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4749 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2628 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102987 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48924 -Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3176 -Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3152 -Buddy_Opening2D_Constant_Padding/1 0.359 ms 0.359 ms 1998 -Buddy_Closing2D_Constant_Padding/1 0.336 ms 0.336 ms 1943 -Buddy_TopHat2D_Constant_Padding/1 0.909 ms 0.909 ms 750 -Buddy_BottomHat2D_Constant_Padding/1 0.912 ms 0.912 ms 737 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5030 -OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3017 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3170 -OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2657 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2690 -OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2671 -OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4878 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:16:02+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 4.58, 14.63, 10.53 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 22.0 ms 22.0 ms 32 -MLIR_Conv2D/1 68.1 ms 68.1 ms 10 -Buddy_Conv2D/1 6.34 ms 6.34 ms 109 -Buddy_Corr2D_Constant_Padding/1 4.70 ms 4.70 ms 148 -OpenCV_Filter2D_Constant_Padding/1 8.80 ms 8.80 ms 79 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4771 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2626 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103087 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48675 -Buddy_Erosion2D_Constant_Padding/1 0.220 ms 0.220 ms 3188 -Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3182 -Buddy_Opening2D_Constant_Padding/1 0.334 ms 0.334 ms 2146 -Buddy_Closing2D_Constant_Padding/1 0.394 ms 0.394 ms 2142 -Buddy_TopHat2D_Constant_Padding/1 0.901 ms 0.901 ms 745 -Buddy_BottomHat2D_Constant_Padding/1 0.918 ms 0.918 ms 722 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5046 -OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3129 -OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3078 -OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2660 -OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2723 -OpenCV_MorphGrad2D_Constant_Padding/1 0.260 ms 0.260 ms 2685 -OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4978 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:16:26+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 3.70, 13.62, 10.30 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 22.1 ms 22.1 ms 32 -MLIR_Conv2D/1 68.2 ms 68.2 ms 10 -Buddy_Conv2D/1 6.36 ms 6.35 ms 110 -Buddy_Corr2D_Constant_Padding/1 4.70 ms 4.70 ms 149 -OpenCV_Filter2D_Constant_Padding/1 8.81 ms 8.81 ms 80 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4781 -Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2627 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102862 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48877 -Buddy_Erosion2D_Constant_Padding/1 0.224 ms 0.224 ms 3093 -Buddy_Dilation2D_Constant_Padding/1 0.223 ms 0.223 ms 3107 -Buddy_Opening2D_Constant_Padding/1 0.344 ms 0.344 ms 2034 -Buddy_Closing2D_Constant_Padding/1 0.342 ms 0.342 ms 1989 -Buddy_TopHat2D_Constant_Padding/1 0.892 ms 0.892 ms 754 -Buddy_BottomHat2D_Constant_Padding/1 0.912 ms 0.912 ms 736 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5028 -OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3031 -OpenCV_Closing2D_Constant_Padding/1 0.225 ms 0.225 ms 3131 -OpenCV_TopHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2697 -OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2706 -OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2674 -OpenCV_Dilate2D_Constant_Padding/1 0.155 ms 0.155 ms 4993 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:16:50+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 3.12, 12.68, 10.08 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 35.2 ms 35.2 ms 20 -MLIR_Conv2D/1 121 ms 121 ms 6 -Buddy_Conv2D/1 10.8 ms 10.8 ms 64 -Buddy_Corr2D_Constant_Padding/1 7.91 ms 7.91 ms 89 -OpenCV_Filter2D_Constant_Padding/1 5.90 ms 5.90 ms 118 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4772 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2626 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102812 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48301 -Buddy_Erosion2D_Constant_Padding/1 0.220 ms 0.219 ms 3183 -Buddy_Dilation2D_Constant_Padding/1 0.220 ms 0.220 ms 3163 -Buddy_Opening2D_Constant_Padding/1 0.336 ms 0.336 ms 2017 -Buddy_Closing2D_Constant_Padding/1 0.355 ms 0.355 ms 2024 -Buddy_TopHat2D_Constant_Padding/1 0.903 ms 0.903 ms 749 -Buddy_BottomHat2D_Constant_Padding/1 0.908 ms 0.908 ms 730 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5038 -OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3212 -OpenCV_Closing2D_Constant_Padding/1 0.220 ms 0.220 ms 3171 -OpenCV_TopHat2D_Constant_Padding/1 0.253 ms 0.253 ms 2701 -OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2755 -OpenCV_MorphGrad2D_Constant_Padding/1 0.257 ms 0.257 ms 2727 -OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5053 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:17:14+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.73, 11.82, 9.87 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 35.3 ms 35.3 ms 20 -MLIR_Conv2D/1 121 ms 121 ms 6 -Buddy_Conv2D/1 10.9 ms 10.9 ms 64 -Buddy_Corr2D_Constant_Padding/1 7.93 ms 7.93 ms 88 -OpenCV_Filter2D_Constant_Padding/1 5.91 ms 5.91 ms 118 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4703 -Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2264 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103210 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48545 -Buddy_Erosion2D_Constant_Padding/1 0.221 ms 0.221 ms 3132 -Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3160 -Buddy_Opening2D_Constant_Padding/1 0.331 ms 0.331 ms 2027 -Buddy_Closing2D_Constant_Padding/1 0.350 ms 0.350 ms 2017 -Buddy_TopHat2D_Constant_Padding/1 0.901 ms 0.901 ms 750 -Buddy_BottomHat2D_Constant_Padding/1 0.878 ms 0.878 ms 761 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5027 -OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3038 -OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 3072 -OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2693 -OpenCV_BottomHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2663 -OpenCV_MorphGrad2D_Constant_Padding/1 0.259 ms 0.259 ms 2707 -OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 5003 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:17:38+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.48, 11.03, 9.66 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.99 ms 4.99 ms 139 -MLIR_Conv2D/1 7.38 ms 7.38 ms 94 -Buddy_Conv2D/1 0.711 ms 0.711 ms 1001 -Buddy_Corr2D_Constant_Padding/1 1.08 ms 1.08 ms 645 -OpenCV_Filter2D_Constant_Padding/1 1.90 ms 1.90 ms 367 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.147 ms 0.147 ms 4717 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2633 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103033 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48825 -Buddy_Erosion2D_Constant_Padding/1 0.223 ms 0.223 ms 3139 -Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3065 -Buddy_Opening2D_Constant_Padding/1 0.360 ms 0.360 ms 1920 -Buddy_Closing2D_Constant_Padding/1 0.364 ms 0.364 ms 1938 -Buddy_TopHat2D_Constant_Padding/1 0.897 ms 0.896 ms 725 -Buddy_BottomHat2D_Constant_Padding/1 0.919 ms 0.919 ms 739 -OpenCV_Erode2D_Constant_Padding/1 0.154 ms 0.154 ms 4653 -OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3211 -OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3106 -OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2694 -OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2735 -OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2736 -OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4980 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:18:02+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.49, 10.48, 9.50 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.01 ms 5.01 ms 140 -MLIR_Conv2D/1 7.39 ms 7.39 ms 95 -Buddy_Conv2D/1 0.702 ms 0.702 ms 962 -Buddy_Corr2D_Constant_Padding/1 1.07 ms 1.07 ms 652 -OpenCV_Filter2D_Constant_Padding/1 1.91 ms 1.91 ms 366 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4698 -Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2627 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102351 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48814 -Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3146 -Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3150 -Buddy_Opening2D_Constant_Padding/1 0.330 ms 0.329 ms 2128 -Buddy_Closing2D_Constant_Padding/1 0.332 ms 0.332 ms 2167 -Buddy_TopHat2D_Constant_Padding/1 0.866 ms 0.866 ms 747 -Buddy_BottomHat2D_Constant_Padding/1 0.875 ms 0.875 ms 769 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5009 -OpenCV_Opening2D_Constant_Padding/1 0.216 ms 0.216 ms 3227 -OpenCV_Closing2D_Constant_Padding/1 0.225 ms 0.225 ms 3120 -OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2745 -OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2765 -OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2733 -OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4956 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:18:25+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.26, 9.78, 9.30 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.9 ms 11.9 ms 60 -MLIR_Conv2D/1 29.8 ms 29.8 ms 23 -Buddy_Conv2D/1 2.09 ms 2.09 ms 332 -Buddy_Corr2D_Constant_Padding/1 1.80 ms 1.80 ms 390 -OpenCV_Filter2D_Constant_Padding/1 2.74 ms 2.74 ms 256 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4768 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2629 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103262 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48878 -Buddy_Erosion2D_Constant_Padding/1 0.224 ms 0.224 ms 3105 -Buddy_Dilation2D_Constant_Padding/1 0.223 ms 0.223 ms 3120 -Buddy_Opening2D_Constant_Padding/1 0.352 ms 0.352 ms 1986 -Buddy_Closing2D_Constant_Padding/1 0.348 ms 0.348 ms 1944 -Buddy_TopHat2D_Constant_Padding/1 0.914 ms 0.914 ms 747 -Buddy_BottomHat2D_Constant_Padding/1 0.899 ms 0.899 ms 739 -OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4963 -OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3142 -OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3089 -OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2689 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2693 -OpenCV_MorphGrad2D_Constant_Padding/1 0.263 ms 0.263 ms 2664 -OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5045 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:18:50+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.17, 9.16, 9.10 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.6 ms 11.6 ms 60 -MLIR_Conv2D/1 29.9 ms 29.9 ms 23 -Buddy_Conv2D/1 2.13 ms 2.13 ms 327 -Buddy_Corr2D_Constant_Padding/1 1.80 ms 1.80 ms 389 -OpenCV_Filter2D_Constant_Padding/1 2.74 ms 2.74 ms 255 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4755 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2633 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102709 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48854 -Buddy_Erosion2D_Constant_Padding/1 0.231 ms 0.231 ms 2456 -Buddy_Dilation2D_Constant_Padding/1 0.226 ms 0.226 ms 3097 -Buddy_Opening2D_Constant_Padding/1 0.352 ms 0.352 ms 2015 -Buddy_Closing2D_Constant_Padding/1 0.346 ms 0.346 ms 2002 -Buddy_TopHat2D_Constant_Padding/1 0.924 ms 0.924 ms 721 -Buddy_BottomHat2D_Constant_Padding/1 0.923 ms 0.923 ms 727 -OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 4988 -OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3142 -OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3180 -OpenCV_TopHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2711 -OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2642 -OpenCV_MorphGrad2D_Constant_Padding/1 0.259 ms 0.259 ms 2712 -OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5064 +[Success] … Testing AVX2 support AVX2 is supported. Running image-processing-benchmark for AVX2 Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:19:15+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.11, 8.58, 8.91 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.87 ms 4.87 ms 144 -MLIR_Conv2D/1 7.37 ms 7.37 ms 95 -Buddy_Conv2D/1 0.315 ms 0.315 ms 2234 -Buddy_Corr2D_Constant_Padding/1 0.824 ms 0.824 ms 850 -OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 546 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4747 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2629 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102542 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48907 -Buddy_Erosion2D_Constant_Padding/1 0.224 ms 0.224 ms 3132 -Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3121 -Buddy_Opening2D_Constant_Padding/1 0.338 ms 0.338 ms 2059 -Buddy_Closing2D_Constant_Padding/1 0.344 ms 0.344 ms 2107 -Buddy_TopHat2D_Constant_Padding/1 0.904 ms 0.904 ms 725 -Buddy_BottomHat2D_Constant_Padding/1 0.911 ms 0.911 ms 744 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5040 -OpenCV_Opening2D_Constant_Padding/1 0.242 ms 0.242 ms 3222 -OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3151 -OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2758 -OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2688 -OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2740 -OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4990 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:19:39+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.07, 8.05, 8.73 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.01 ms 5.01 ms 140 -MLIR_Conv2D/1 7.36 ms 7.36 ms 95 -Buddy_Conv2D/1 0.313 ms 0.313 ms 2233 -Buddy_Corr2D_Constant_Padding/1 0.813 ms 0.813 ms 856 -OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 547 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4751 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2631 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103135 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48881 -Buddy_Erosion2D_Constant_Padding/1 0.220 ms 0.220 ms 3158 -Buddy_Dilation2D_Constant_Padding/1 0.225 ms 0.225 ms 3109 -Buddy_Opening2D_Constant_Padding/1 0.340 ms 0.340 ms 2007 -Buddy_Closing2D_Constant_Padding/1 0.341 ms 0.341 ms 2062 -Buddy_TopHat2D_Constant_Padding/1 0.904 ms 0.904 ms 732 -Buddy_BottomHat2D_Constant_Padding/1 0.907 ms 0.907 ms 731 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5071 -OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3192 -OpenCV_Closing2D_Constant_Padding/1 0.217 ms 0.217 ms 3221 -OpenCV_TopHat2D_Constant_Padding/1 0.252 ms 0.252 ms 2767 -OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2752 -OpenCV_MorphGrad2D_Constant_Padding/1 0.259 ms 0.259 ms 2708 -OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4910 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:20:03+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.05, 7.66, 8.58 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.00 ms 5.00 ms 139 -MLIR_Conv2D/1 7.38 ms 7.38 ms 95 -Buddy_Conv2D/1 0.313 ms 0.313 ms 2239 -Buddy_Corr2D_Constant_Padding/1 0.815 ms 0.815 ms 863 -OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 548 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4797 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2637 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103585 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48856 -Buddy_Erosion2D_Constant_Padding/1 0.223 ms 0.223 ms 3141 -Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3162 -Buddy_Opening2D_Constant_Padding/1 0.336 ms 0.336 ms 2082 -Buddy_Closing2D_Constant_Padding/1 0.328 ms 0.328 ms 2093 -Buddy_TopHat2D_Constant_Padding/1 0.917 ms 0.917 ms 727 -Buddy_BottomHat2D_Constant_Padding/1 0.915 ms 0.915 ms 732 -OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 5002 -OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3104 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3100 -OpenCV_TopHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2764 -OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2700 -OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2725 -OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4989 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:20:27+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.09, 7.22, 8.41 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.00 ms 5.00 ms 140 -MLIR_Conv2D/1 7.37 ms 7.37 ms 95 -Buddy_Conv2D/1 0.313 ms 0.313 ms 2227 -Buddy_Corr2D_Constant_Padding/1 0.815 ms 0.815 ms 857 -OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 548 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.145 ms 0.145 ms 4786 -Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2621 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103139 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48916 -Buddy_Erosion2D_Constant_Padding/1 0.228 ms 0.228 ms 3055 -Buddy_Dilation2D_Constant_Padding/1 0.264 ms 0.264 ms 3073 -Buddy_Opening2D_Constant_Padding/1 0.388 ms 0.388 ms 1855 -Buddy_Closing2D_Constant_Padding/1 0.374 ms 0.374 ms 2033 -Buddy_TopHat2D_Constant_Padding/1 0.911 ms 0.911 ms 727 -Buddy_BottomHat2D_Constant_Padding/1 0.897 ms 0.897 ms 732 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5021 -OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3154 -OpenCV_Closing2D_Constant_Padding/1 0.220 ms 0.220 ms 3178 -OpenCV_TopHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2759 -OpenCV_BottomHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2757 -OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2734 -OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5010 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:20:51+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.06, 6.80, 8.24 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.8 ms 11.8 ms 60 -MLIR_Conv2D/1 29.9 ms 29.9 ms 23 -Buddy_Conv2D/1 1.30 ms 1.30 ms 524 -Buddy_Corr2D_Constant_Padding/1 2.37 ms 2.37 ms 294 -OpenCV_Filter2D_Constant_Padding/1 4.20 ms 4.20 ms 166 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4766 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2632 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102992 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48914 -Buddy_Erosion2D_Constant_Padding/1 0.223 ms 0.223 ms 3110 -Buddy_Dilation2D_Constant_Padding/1 0.226 ms 0.226 ms 3094 -Buddy_Opening2D_Constant_Padding/1 0.354 ms 0.354 ms 1992 -Buddy_Closing2D_Constant_Padding/1 0.348 ms 0.348 ms 1972 -Buddy_TopHat2D_Constant_Padding/1 0.907 ms 0.907 ms 731 -Buddy_BottomHat2D_Constant_Padding/1 0.905 ms 0.905 ms 740 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5054 -OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3141 -OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3119 -OpenCV_TopHat2D_Constant_Padding/1 0.289 ms 0.289 ms 2692 -OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2695 -OpenCV_MorphGrad2D_Constant_Padding/1 0.260 ms 0.260 ms 2680 -OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5013 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:21:15+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.04, 6.41, 8.07 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.8 ms 11.8 ms 59 -MLIR_Conv2D/1 29.9 ms 29.8 ms 24 -Buddy_Conv2D/1 1.31 ms 1.31 ms 542 -Buddy_Corr2D_Constant_Padding/1 2.38 ms 2.38 ms 293 -OpenCV_Filter2D_Constant_Padding/1 4.20 ms 4.20 ms 166 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4777 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2635 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102983 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48881 -Buddy_Erosion2D_Constant_Padding/1 0.221 ms 0.221 ms 3156 -Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3159 -Buddy_Opening2D_Constant_Padding/1 0.330 ms 0.330 ms 2113 -Buddy_Closing2D_Constant_Padding/1 0.334 ms 0.334 ms 2087 -Buddy_TopHat2D_Constant_Padding/1 0.877 ms 0.877 ms 736 -Buddy_BottomHat2D_Constant_Padding/1 0.891 ms 0.891 ms 740 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5026 -OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3102 -OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3074 -OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2684 -OpenCV_BottomHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2660 -OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2685 -OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4970 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:21:40+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.02, 6.06, 7.91 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 22.2 ms 22.2 ms 31 -MLIR_Conv2D/1 68.1 ms 68.1 ms 10 -Buddy_Conv2D/1 2.32 ms 2.32 ms 300 -Buddy_Corr2D_Constant_Padding/1 4.70 ms 4.70 ms 148 -OpenCV_Filter2D_Constant_Padding/1 8.80 ms 8.80 ms 79 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4765 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2606 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103232 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48866 -Buddy_Erosion2D_Constant_Padding/1 0.223 ms 0.223 ms 3108 -Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3124 -Buddy_Opening2D_Constant_Padding/1 0.330 ms 0.330 ms 1961 -Buddy_Closing2D_Constant_Padding/1 0.350 ms 0.350 ms 2094 -Buddy_TopHat2D_Constant_Padding/1 0.899 ms 0.899 ms 764 -Buddy_BottomHat2D_Constant_Padding/1 0.881 ms 0.881 ms 784 -OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 4984 -OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3103 -OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3089 -OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2707 -OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2743 -OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2695 -OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4997 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:22:04+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.01, 5.73, 7.75 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 22.0 ms 22.0 ms 32 -MLIR_Conv2D/1 68.1 ms 68.1 ms 10 -Buddy_Conv2D/1 2.22 ms 2.22 ms 304 -Buddy_Corr2D_Constant_Padding/1 4.70 ms 4.70 ms 149 -OpenCV_Filter2D_Constant_Padding/1 8.80 ms 8.80 ms 79 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4719 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2629 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102880 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48902 -Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3148 -Buddy_Dilation2D_Constant_Padding/1 0.223 ms 0.223 ms 3112 -Buddy_Opening2D_Constant_Padding/1 0.401 ms 0.401 ms 1703 -Buddy_Closing2D_Constant_Padding/1 0.370 ms 0.370 ms 1933 -Buddy_TopHat2D_Constant_Padding/1 0.940 ms 0.940 ms 724 -Buddy_BottomHat2D_Constant_Padding/1 0.904 ms 0.904 ms 704 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5045 -OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3039 -OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 3127 -OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2665 -OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2664 -OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2666 -OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4964 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:22:28+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.01, 5.49, 7.63 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 35.2 ms 35.2 ms 20 -MLIR_Conv2D/1 122 ms 122 ms 6 -Buddy_Conv2D/1 4.21 ms 4.21 ms 167 -Buddy_Corr2D_Constant_Padding/1 7.90 ms 7.90 ms 89 -OpenCV_Filter2D_Constant_Padding/1 5.92 ms 5.92 ms 118 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4760 -Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.266 ms 2636 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103065 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 47744 -Buddy_Erosion2D_Constant_Padding/1 0.220 ms 0.220 ms 3166 -Buddy_Dilation2D_Constant_Padding/1 0.220 ms 0.220 ms 3164 -Buddy_Opening2D_Constant_Padding/1 0.329 ms 0.329 ms 2083 -Buddy_Closing2D_Constant_Padding/1 0.331 ms 0.331 ms 2139 -Buddy_TopHat2D_Constant_Padding/1 0.877 ms 0.877 ms 739 -Buddy_BottomHat2D_Constant_Padding/1 0.863 ms 0.863 ms 755 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5029 -OpenCV_Opening2D_Constant_Padding/1 0.220 ms 0.220 ms 3140 -OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3204 -OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2744 -OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2737 -OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2177 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5097 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:22:53+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.01, 5.21, 7.48 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 34.8 ms 34.8 ms 20 -MLIR_Conv2D/1 122 ms 122 ms 6 -Buddy_Conv2D/1 3.87 ms 3.87 ms 179 -Buddy_Corr2D_Constant_Padding/1 7.89 ms 7.89 ms 89 -OpenCV_Filter2D_Constant_Padding/1 5.91 ms 5.91 ms 118 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4765 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2629 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102844 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48492 -Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3127 -Buddy_Dilation2D_Constant_Padding/1 0.221 ms 0.221 ms 3155 -Buddy_Opening2D_Constant_Padding/1 0.349 ms 0.349 ms 2000 -Buddy_Closing2D_Constant_Padding/1 0.355 ms 0.355 ms 2036 -Buddy_TopHat2D_Constant_Padding/1 0.910 ms 0.910 ms 751 -Buddy_BottomHat2D_Constant_Padding/1 0.912 ms 0.912 ms 755 -OpenCV_Erode2D_Constant_Padding/1 0.140 ms 0.140 ms 5001 -OpenCV_Opening2D_Constant_Padding/1 0.220 ms 0.220 ms 3193 -OpenCV_Closing2D_Constant_Padding/1 0.219 ms 0.219 ms 3193 -OpenCV_TopHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2734 -OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2725 -OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2723 -OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5067 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:23:17+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.00, 4.95, 7.33 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.86 ms 4.86 ms 144 -MLIR_Conv2D/1 7.37 ms 7.37 ms 95 -Buddy_Conv2D/1 0.420 ms 0.420 ms 1663 -Buddy_Corr2D_Constant_Padding/1 1.08 ms 1.08 ms 651 -OpenCV_Filter2D_Constant_Padding/1 1.90 ms 1.90 ms 367 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4748 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2632 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102984 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48927 -Buddy_Erosion2D_Constant_Padding/1 0.224 ms 0.224 ms 3155 -Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3149 -Buddy_Opening2D_Constant_Padding/1 0.360 ms 0.360 ms 2068 -Buddy_Closing2D_Constant_Padding/1 0.356 ms 0.356 ms 1957 -Buddy_TopHat2D_Constant_Padding/1 0.936 ms 0.936 ms 731 -Buddy_BottomHat2D_Constant_Padding/1 0.919 ms 0.919 ms 729 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5005 -OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3076 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3193 -OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2760 -OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2736 -OpenCV_MorphGrad2D_Constant_Padding/1 0.256 ms 0.256 ms 2716 -OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4963 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:23:41+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.00, 4.71, 7.19 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.87 ms 4.87 ms 143 -MLIR_Conv2D/1 7.37 ms 7.37 ms 95 -Buddy_Conv2D/1 0.425 ms 0.425 ms 1657 -corrupted double-linked list +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -2025-05-26T22:23:45+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.00, 4.66, 7.16 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.8 ms 11.8 ms 59 -MLIR_Conv2D/1 29.9 ms 29.9 ms 24 -Buddy_Conv2D/1 1.02 ms 1.02 ms 751 -Buddy_Corr2D_Constant_Padding/1 1.79 ms 1.79 ms 391 -OpenCV_Filter2D_Constant_Padding/1 2.74 ms 2.74 ms 255 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4778 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2630 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103033 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.016 ms 0.016 ms 48860 -Buddy_Erosion2D_Constant_Padding/1 0.225 ms 0.225 ms 3134 -Buddy_Dilation2D_Constant_Padding/1 0.226 ms 0.226 ms 2971 -Buddy_Opening2D_Constant_Padding/1 0.347 ms 0.347 ms 1881 -Buddy_Closing2D_Constant_Padding/1 0.348 ms 0.348 ms 2009 -Buddy_TopHat2D_Constant_Padding/1 0.927 ms 0.927 ms 714 -Buddy_BottomHat2D_Constant_Padding/1 0.931 ms 0.931 ms 723 -OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4965 -OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3088 -OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3129 -OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2625 -OpenCV_BottomHat2D_Constant_Padding/1 0.269 ms 0.269 ms 2625 -OpenCV_MorphGrad2D_Constant_Padding/1 0.260 ms 0.260 ms 2692 -OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5053 +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -2025-05-26T22:24:09+00:00 -Running ./bin/image-processing-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.06, 4.46, 7.03 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.9 ms 11.9 ms 60 -MLIR_Conv2D/1 29.9 ms 29.9 ms 24 -Buddy_Conv2D/1 1.01 ms 1.01 ms 738 -Buddy_Corr2D_Constant_Padding/1 1.78 ms 1.78 ms 393 -OpenCV_Filter2D_Constant_Padding/1 2.75 ms 2.75 ms 255 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4756 -Buddy_Resize2D_Bilinear_Interpolation/1 0.266 ms 0.266 ms 2630 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103027 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48458 -Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3152 -Buddy_Dilation2D_Constant_Padding/1 0.222 ms 0.222 ms 3142 -Buddy_Opening2D_Constant_Padding/1 0.334 ms 0.334 ms 2069 -Buddy_Closing2D_Constant_Padding/1 0.332 ms 0.332 ms 2004 -Buddy_TopHat2D_Constant_Padding/1 0.916 ms 0.916 ms 753 -Buddy_BottomHat2D_Constant_Padding/1 0.871 ms 0.871 ms 762 -OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4961 -OpenCV_Opening2D_Constant_Padding/1 0.220 ms 0.220 ms 3152 -OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 2470 -OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2673 -OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2749 -OpenCV_MorphGrad2D_Constant_Padding/1 0.257 ms 0.257 ms 2717 -OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5082 +[Success] … Testing AVX512 support CPU does not support AVX512. Testing NEON support diff --git a/test_result/vectorization/vectorization_matrix.json b/test_result/vectorization/vectorization_matrix.json old mode 100755 new mode 100644 index a25f391c..b8b5ca10 --- a/test_result/vectorization/vectorization_matrix.json +++ b/test_result/vectorization/vectorization_matrix.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-05-26T21:43:08+00:00", + "date": "2025-06-01T10:11:11+00:00", "host_name": "4ed4bacfe45d", "executable": "./vectorization-matrix-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [17.3032,18.9692,12.2134], + "load_avg": [1,1.08789,1.76172], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 35685429, - "real_time": 1.9466575990653187e+01, - "cpu_time": 1.9465874124702275e+01, + "iterations": 37302822, + "real_time": 1.8817115302162652e+01, + "cpu_time": 1.8816787319736829e+01, "time_unit": "ns" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 33101404, - "real_time": 2.1422448062752832e+01, - "cpu_time": 2.1422267405938427e+01, + "iterations": 35030976, + "real_time": 2.0450598917981729e+01, + "cpu_time": 2.0450241780303237e+01, "time_unit": "ns" } ] diff --git a/test_result/vectorization/vectorization_matrix.log b/test_result/vectorization/vectorization_matrix.log new file mode 100644 index 00000000..0fb91b1e --- /dev/null +++ b/test_result/vectorization/vectorization_matrix.log @@ -0,0 +1,21 @@ +2025-06-01T10:11:11+00:00 +Running ./vectorization-matrix-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.09, 1.76 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------- +MLIR_MatMul/1 18.8 ns 18.8 ns 37302822 +MLIR_MatVec/1 20.5 ns 20.5 ns 35030976 +-------------------------------------------------------- +MLIR_MatMul: MLIR MatMul Operation + Nested Loop +[ 18 18 18 18 18 18 18 18 18 18 ] +-------------------------------------------------------- +MLIR_MatVec: MLIR MatVec Operation +[ 18 18 18 18 18 18 18 18 18 18 ] diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log old mode 100755 new mode 100644 index 76a41ad1..c1bb63ff --- a/test_result/vectorization/vectorization_result.log +++ b/test_result/vectorization/vectorization_result.log @@ -1,4 +1,4 @@ -Vectorization Benchmark - Mon May 26 21:43:02 UTC 2025 +Vectorization Benchmark - Sun Jun 1 10:11:04 UTC 2025 [Info] Starting vectorization-matrix-benchmark build... [Info] Running CMake configuration... -- The CXX compiler identification is GNU 11.4.0 @@ -128,23 +128,23 @@ Call Stack (most recent call first): -- Generating done -- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build [10/17] Performing build step for 'project_googlebenchmark' -[1/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o -[2/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o -[3/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o -[4/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o -[5/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o -[6/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o -[7/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o -[8/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o -[9/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o -[10/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o -[11/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o +[1/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o +[2/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o +[3/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[4/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o +[7/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o +[9/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o +[10/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o +[11/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o [12/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o -[13/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o -[14/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o +[13/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o [15/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o -[16/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o -[17/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o +[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o [18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o [19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o [20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o @@ -176,11 +176,11 @@ Call Stack (most recent call first): [12/17] No test step for 'project_googlebenchmark' [13/17] Completed 'project_googlebenchmark' [14/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/Main.cpp.o -[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o -[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o +[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o +[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o [17/17] Linking CXX executable bin/vectorization-matrix-benchmark [Info] Running vectorization-matrix-benchmark... -2025-05-26T21:43:08+00:00 +2025-06-01T10:11:11+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -188,13 +188,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 17.30, 18.97, 12.21 +Load Average: 1.00, 1.09, 1.76 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 19.5 ns 19.5 ns 35685429 -MLIR_MatVec/1 21.4 ns 21.4 ns 33101404 +MLIR_MatMul/1 18.8 ns 18.8 ns 37302822 +MLIR_MatVec/1 20.5 ns 20.5 ns 35030976 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] From 13002085449dc2e5563df96c85dee340c7a47f86 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 1 Jun 2025 13:18:59 +0200 Subject: [PATCH 24/52] update the script. --- .github/workflows/bench.yml | 16 +++++++++++----- scripts/logs2html.py | 19 +++++++++++-------- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/vectorization/vectorization_matrix.html | 2 +- 59 files changed, 79 insertions(+), 70 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index c8105b08..28f79e8e 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -56,9 +56,15 @@ jobs: with: path: /home/quliu/buddy-complier-workspace/buddy-benchmark/site - - name: Deploy to Pages - id: deploy - uses: actions/deploy-pages@v4 + - name: Push to buddy-compiler.github.io + uses: peaceiris/actions-gh-pages@v4 + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + with: + personal_token: ${{ secrets.BUDDY_SITE_PAT }} + external_repository: buddy-compiler/buddy-compiler.github.io + publish_dir: /home/quliu/buddy-complier-workspace/buddy-benchmark/site + destination_dir: benchmarks/${{ github.sha }} + publish_branch: main + keep_files: true + commit_message: Deploy benchmark results for ${{ github.sha }} (from ${{ github.repository }}) - outputs: - page_url: ${{ steps.deploy.outputs.page_url }} diff --git a/scripts/logs2html.py b/scripts/logs2html.py index 90053c4c..8cf9f585 100755 --- a/scripts/logs2html.py +++ b/scripts/logs2html.py @@ -88,11 +88,14 @@ def gbench_json_to_table(js_path: pathlib.Path) -> str: # --------------------------------------------------------------------------- # rebuild index # --------------------------------------------------------------------------- -links = "\n".join( - f'
          • ' - f'{p.relative_to(dst).as_posix()}
          • ' - for p in sorted(dst.rglob("*.html")) - if p.name != "index.html" -) -(dst / "index.html").write_text( - CSS + "

            Buddy-Benchmark results

              \n" + links + "\n
            ") +run_dir = dst / "benchmarks" / datetime.date.today().isoformat() +run_dir.mkdir(parents=True, exist_ok=True) + +# --------------------------------------------------------------------------- +# rebuild index (leave the links = … block exactly as it is) +# --------------------------------------------------------------------------- + +# ⬇︎ replace the old line that wrote to “dst/index.html” +(run_dir / "index.html").write_text( + CSS + f"

            Buddy-Benchmark results

              \n{links}\n
            " +) \ No newline at end of file diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index 7dd67bee..4f62ab66 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-layer-ffn-benchmark.json

            diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index 9a9c019c..5f346ea7 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06530.065310,788
            diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index aef64b00..3880625f 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196355,522
            diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index ad337c18..2f5dc8bb 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index 34c6a586..39e390d4 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1610.1614,427
            diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index 150e7970..616274a1 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar35.935.918
            diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index f834f3b1..43346d6f 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7197181
            diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index 0f2ff8c8..feb61858 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.73e+051.73e+051
            diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index bd9433e9..a116e426 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization7.92e+047.92e+041
            diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 570398f8..a7dd74a2 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02990.029923,357
            diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index e3f8afb0..3902b57d 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02930.029323,918
            diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index 0d6c82f8..753ca334 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02960.029623,548
            diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index 731d0f82..49254629 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02280.022830,522
            diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 4e069683..4b37cd46 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02930.029323,583
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index ccc101b1..4d75de0e 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.53e+033.53e+031
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index c4f23e37..6674ac80 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index b1f4f3e2..fa621387 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.572.55
            diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 96e9e457..e2c1b157 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.432.421
            diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index b322f413..bc5d6c0c 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:57.277.275
            diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 7fb4b696..f5b67729 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,072
            diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index 2655a6f8..5657ff0c 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08520.08528,120
            diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index 35a9e0c9..1ed5596d 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,624
            diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index 3c92e8df..82c0efb4 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:14.1e+034.1e+031
            diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index 1b937e8f..f60df888 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 10:27:55 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index 24055ad2..50a12071 100644
            --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 10:27:55 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 398f1399..5dcf60ee 100644
            --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2332,993
            diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 950f277f..96bb6aef 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005590.00559123,186
            diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index 5dd32b66..11dc3f4d 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 10:22:14 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 10:27:55 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.04e+035
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 61c39a76..3054db9c 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:524.917.75
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 097b55cc..a613016f 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 8b379ccc..0c801698 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5b9a66e0..5719d906 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 5db498f4..3fc38282 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 7d89d00b..a1625e66 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b7a6d9a1..562cffd1 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 49aab0f9..8cdc0008 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 4f7ad6fd..cdf8d4f1 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 242d00ec..787d0693 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e3fc5ad2..95f63959 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 6ed9986a..044081ab 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 28cb3cc4..40c4d210 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index b8fa1267..4b5af98b 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index d968de74..79645bb4 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 1fd47c10..c9b45edb 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 1b283099..4a9e3061 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 12cc3d84..90759e30 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a1c47c85..59176be0 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 06a3a1cc..14558eed 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index c1c11478..d83fdd20 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 3db0471f..e3495b73 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 68d8c188..5c992b30 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index c9393db2..321b7216 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 3cb192cb..9a961aeb 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index a88fc411..90d92b6a 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e0e22ffa..56e4345c 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5d73a832..95c56858 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:22:14 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index d875b3c4..862a2a83 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-06-01 10:22:14 UTC

            +

            vectorization/vectorization_matrix.json

            2025-06-01 10:27:55 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From fabbc2703521c27f41fb4745b01c63b14f7b3376 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 1 Jun 2025 13:31:10 +0200 Subject: [PATCH 25/52] update the script. --- scripts/logs2html.py | 14 +++- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/index.html | 70 ------------------- site/vectorization/vectorization_matrix.html | 2 +- 59 files changed, 68 insertions(+), 130 deletions(-) delete mode 100644 site/index.html diff --git a/scripts/logs2html.py b/scripts/logs2html.py index 8cf9f585..b9fc79be 100755 --- a/scripts/logs2html.py +++ b/scripts/logs2html.py @@ -86,16 +86,24 @@ def gbench_json_to_table(js_path: pathlib.Path) -> str: page.write_text("\n".join(body)) # --------------------------------------------------------------------------- -# rebuild index +# ❶ choose a dated sub-folder for this run # --------------------------------------------------------------------------- run_dir = dst / "benchmarks" / datetime.date.today().isoformat() run_dir.mkdir(parents=True, exist_ok=True) # --------------------------------------------------------------------------- -# rebuild index (leave the links = … block exactly as it is) +# ❷ collect links to every HTML page we just generated # --------------------------------------------------------------------------- +links = "\n".join( + f'
          • ' + f'{p.relative_to(run_dir).as_posix()}
          • ' + for p in sorted(run_dir.rglob("*.html")) # inside today's folder + if p.name != "index.html" # skip the index itself +) -# ⬇︎ replace the old line that wrote to “dst/index.html” +# --------------------------------------------------------------------------- +# ❸ write (or overwrite) today’s index page +# --------------------------------------------------------------------------- (run_dir / "index.html").write_text( CSS + f"

            Buddy-Benchmark results

              \n{links}\n
            " ) \ No newline at end of file diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index 4f62ab66..d32ac37f 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index 5f346ea7..c560545c 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06530.065310,788
            diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index 3880625f..3f1b8846 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196355,522
            diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index 2f5dc8bb..bb215fee 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index 39e390d4..c03f91cd 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1610.1614,427
            diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index 616274a1..166b04dc 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar35.935.918
            diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index 43346d6f..602f666d 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7197181
            diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index feb61858..2792d3f1 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.73e+051.73e+051
            diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index a116e426..1f417e1b 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization7.92e+047.92e+041
            diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index a7dd74a2..3535f566 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02990.029923,357
            diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 3902b57d..cd46cb73 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02930.029323,918
            diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index 753ca334..8dd3f839 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02960.029623,548
            diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index 49254629..521fd993 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02280.022830,522
            diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 4b37cd46..4c341360 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02930.029323,583
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index 4d75de0e..945b169d 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.53e+033.53e+031
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 6674ac80..4e882aec 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index fa621387..3dc391ab 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.572.55
            diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index e2c1b157..c4a84a94 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.432.421
            diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index bc5d6c0c..05136028 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:57.277.275
            diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index f5b67729..05a3763b 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,072
            diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index 5657ff0c..a224dfc9 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08520.08528,120
            diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index 1ed5596d..704cdb85 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,624
            diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index 82c0efb4..e8210027 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:14.1e+034.1e+031
            diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index f60df888..7365cb77 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 11:25:55 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index 50a12071..2e24b371 100644
            --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 11:25:55 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 5dcf60ee..542e06a1 100644
            --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2332,993
            diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 96bb6aef..cff69bff 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005590.00559123,186
            diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index 11dc3f4d..bd680496 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 10:27:55 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 11:25:55 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.04e+035
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 3054db9c..9e11fb3c 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:524.917.75
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index a613016f..b95fcba1 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 0c801698..690e30a5 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5719d906..87e89217 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 3fc38282..c2cca69b 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index a1625e66..5f348f7c 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 562cffd1..44c601be 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8cdc0008..e29c4bd4 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index cdf8d4f1..a34ad429 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 787d0693..a3e66687 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 95f63959..e2e3bfe3 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 044081ab..bf09a529 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 40c4d210..8948200d 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 4b5af98b..c6fe9af7 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 79645bb4..cd223503 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index c9b45edb..8b7ee241 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 4a9e3061..d2cf13c5 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 90759e30..c0d8c72d 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 59176be0..c032dc5f 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 14558eed..0b3cd4d7 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index d83fdd20..57b11354 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index e3495b73..ac2937d1 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 5c992b30..5b3d2342 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 321b7216..d7feb33b 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 9a961aeb..8ccec683 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 90d92b6a..387dfb35 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 56e4345c..06a78bc4 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 95c56858..9a69da86 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 10:27:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/index.html b/site/index.html deleted file mode 100644 index 49cd152f..00000000 --- a/site/index.html +++ /dev/null @@ -1,70 +0,0 @@ - - -

            Buddy-Benchmark results

            \ No newline at end of file diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index 862a2a83..bcce3b1d 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-06-01 10:27:55 UTC

            +

            vectorization/vectorization_matrix.json

            2025-06-01 11:25:55 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From 7d0b5b0709578ecea99b9985f59e2963ec418604 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 1 Jun 2025 13:43:04 +0200 Subject: [PATCH 26/52] update the script. --- .github/workflows/bench.yml | 17 +++++++++++++++++ site/benchmarks/2025-06-01/index.html | 14 ++++++++++++++ site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- site/deeplearning/dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...dl-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...lg-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...dl-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...op-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...andom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/vectorization/vectorization_matrix.html | 2 +- 59 files changed, 88 insertions(+), 57 deletions(-) create mode 100644 site/benchmarks/2025-06-01/index.html diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 28f79e8e..5c40e563 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -51,6 +51,23 @@ jobs: rm -rf site python3 scripts/logs2html.py test_result site + - name: Update benchmarks/latest redirect + working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks + run: | + set -e + # -------- pick the most recent dated folder (YYYY-MM-DD) ------------- + latest=$(ls -1d 20*/ | sort -r | head -n1 | tr -d '/') + echo "[Info] newest run is: $latest" + + # -------- rebuild the 'latest' folder with a meta-refresh ------------ + rm -rf latest + mkdir -p latest + cat > latest/index.html < + EOF + echo "[Info] benchmarks/latest now points to ../${latest}/" + + - name: Upload site artifact uses: actions/upload-pages-artifact@v3 with: diff --git a/site/benchmarks/2025-06-01/index.html b/site/benchmarks/2025-06-01/index.html new file mode 100644 index 00000000..1641d47e --- /dev/null +++ b/site/benchmarks/2025-06-01/index.html @@ -0,0 +1,14 @@ + + +

            Buddy-Benchmark results

              + +
            \ No newline at end of file diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index d32ac37f..58795aff 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index c560545c..aa4775e9 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06530.065310,788
            diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index 3f1b8846..2bde1442 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196355,522
            diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index bb215fee..9ece5921 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index c03f91cd..2e9aeb1e 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1610.1614,427
            diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index 166b04dc..b0fdd15c 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar35.935.918
            diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index 602f666d..b066bcf7 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7197181
            diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index 2792d3f1..79e8111e 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.73e+051.73e+051
            diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index 1f417e1b..539e6ce2 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization7.92e+047.92e+041
            diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 3535f566..aef5e48c 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02990.029923,357
            diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index cd46cb73..8fcc466e 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02930.029323,918
            diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index 8dd3f839..53a1e59f 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02960.029623,548
            diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index 521fd993..e8bcdd82 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02280.022830,522
            diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 4c341360..18a3e4cd 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02930.029323,583
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index 945b169d..2291ccea 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.53e+033.53e+031
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 4e882aec..24b6c6d7 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index 3dc391ab..5d068cb3 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.572.55
            diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index c4a84a94..136c0361 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.432.421
            diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index 05136028..5781c5ea 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:57.277.275
            diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 05a3763b..274d6e65 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,072
            diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index a224dfc9..9a01f033 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08520.08528,120
            diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index 704cdb85..f4ae5d20 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,624
            diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index e8210027..9187a0cf 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:14.1e+034.1e+031
            diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index 7365cb77..1c871bfe 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 11:31:25 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index 2e24b371..df8269c6 100644
            --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 11:31:25 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 542e06a1..8bcc67e9 100644
            --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2332,993
            diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index cff69bff..2e02ae85 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005590.00559123,186
            diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index bd680496..bfcf69c0 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 11:25:55 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 11:31:25 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.04e+035
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 9e11fb3c..cb55eb48 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:524.917.75
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index b95fcba1..68a7fad1 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 690e30a5..792094a2 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 87e89217..326ef91c 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index c2cca69b..490de1f3 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5f348f7c..33cdc9b8 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 44c601be..882ca6d0 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index e29c4bd4..2d7d2e10 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a34ad429..a51705f4 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index a3e66687..06314896 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e2e3bfe3..4054f706 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index bf09a529..d4d9da1c 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 8948200d..642fcd64 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index c6fe9af7..709721cb 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index cd223503..c580554e 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8b7ee241..2ec0383f 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index d2cf13c5..aa7f95f5 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index c0d8c72d..5c691102 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index c032dc5f..cb2e6fc1 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 0b3cd4d7..935a5b13 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 57b11354..93288e50 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index ac2937d1..c68000ae 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 5b3d2342..9326eeb2 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index d7feb33b..8a87de52 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 8ccec683..bb6c5a6a 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 387dfb35..7feed3bd 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 06a78bc4..2629842e 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 9a69da86..8faec2d5 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:25:55 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index bcce3b1d..74e4d8e9 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-06-01 11:25:55 UTC

            +

            vectorization/vectorization_matrix.json

            2025-06-01 11:31:25 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From 85e2e9ff2ab77016d9e830075b9caf2ad4a245d1 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 1 Jun 2025 13:45:27 +0200 Subject: [PATCH 27/52] update the script. --- .github/workflows/bench.yml | 4 ++-- site/benchmarks/latest/index.html | 1 + site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- site/deeplearning/dl-layer-rmsnorm-benchmark.html | 2 +- site/deeplearning/dl-layer-selfattention-benchmark.html | 2 +- site/deeplearning/dl-model-lenet-benchmark.html | 2 +- site/deeplearning/dl-model-mobilenetv3-benchmark.html | 2 +- site/deeplearning/dl-model-resnet18-benchmark.html | 2 +- site/deeplearning/dl-model-tinyllama-benchmark.html | 2 +- site/deeplearning/dl-model-whisper-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithaddf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithdivf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithmulf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithnegf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithsubf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html | 2 +- .../deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- .../deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- .../deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- .../dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-mathexp-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-mathfpow-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-matmul-benchmark.html | 2 +- .../deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html | 2 +- .../dl-op-linalg-softmax-exp-sum-div-benchmark.html | 2 +- site/deeplearning/dl-op-matmul-transpose-b-benchmark.html | 2 +- site/deeplearning/dl-op-tosa-transpose-benchmark.html | 2 +- ...nKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...gKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...tKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...nKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...gKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...tKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/vectorization/vectorization_matrix.html | 2 +- 59 files changed, 60 insertions(+), 59 deletions(-) create mode 100644 site/benchmarks/latest/index.html diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 5c40e563..d4032eb8 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -75,13 +75,13 @@ jobs: - name: Push to buddy-compiler.github.io uses: peaceiris/actions-gh-pages@v4 - if: github.event_name == 'push' && github.ref == 'refs/heads/main' + if: github.event_name == 'push' && github.ref == 'refs/heads/master' with: personal_token: ${{ secrets.BUDDY_SITE_PAT }} external_repository: buddy-compiler/buddy-compiler.github.io publish_dir: /home/quliu/buddy-complier-workspace/buddy-benchmark/site destination_dir: benchmarks/${{ github.sha }} - publish_branch: main + publish_branch: master keep_files: true commit_message: Deploy benchmark results for ${{ github.sha }} (from ${{ github.repository }}) diff --git a/site/benchmarks/latest/index.html b/site/benchmarks/latest/index.html new file mode 100644 index 00000000..929219d8 --- /dev/null +++ b/site/benchmarks/latest/index.html @@ -0,0 +1 @@ + diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index 58795aff..84a27ead 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index aa4775e9..38099142 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06530.065310,788
            diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index 2bde1442..7eae0006 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196355,522
            diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index 9ece5921..c8dddad5 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index 2e9aeb1e..9085966d 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1610.1614,427
            diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index b0fdd15c..6f752b97 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar35.935.918
            diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index b066bcf7..9ad63e41 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7197181
            diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index 79e8111e..5ceddcd5 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.73e+051.73e+051
            diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index 539e6ce2..692dfeed 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization7.92e+047.92e+041
            diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index aef5e48c..23d4dc5f 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02990.029923,357
            diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 8fcc466e..19d41836 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02930.029323,918
            diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index 53a1e59f..ac2347ed 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02960.029623,548
            diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index e8bcdd82..f96ac10a 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02280.022830,522
            diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 18a3e4cd..0cbdfae1 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02930.029323,583
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index 2291ccea..8600d9b4 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.53e+033.53e+031
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 24b6c6d7..9a50cd1a 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index 5d068cb3..fe720ac0 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.572.55
            diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 136c0361..586abac5 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.432.421
            diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index 5781c5ea..d4b5967b 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:57.277.275
            diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 274d6e65..0a8c6623 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,072
            diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index 9a01f033..39aea529 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08520.08528,120
            diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index f4ae5d20..1aeacd74 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,624
            diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index 9187a0cf..aa81c13b 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:14.1e+034.1e+031
            diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index 1c871bfe..f3353f49 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 11:43:19 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index df8269c6..94ffbc43 100644
            --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 11:43:19 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 8bcc67e9..e428e2ad 100644
            --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2332,993
            diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 2e02ae85..dd833136 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005590.00559123,186
            diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index bfcf69c0..db2aa6aa 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 11:31:25 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 11:43:19 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.04e+035
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index cb55eb48..b33cd851 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:524.917.75
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 68a7fad1..0422f476 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 792094a2..e73939c8 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 326ef91c..30ce87b5 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 490de1f3..0e7d7084 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 33cdc9b8..3d916ff6 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 882ca6d0..c3d7a107 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 2d7d2e10..2edc917d 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a51705f4..8d8c9e13 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 06314896..154f7e83 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 4054f706..b99bc1cc 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index d4d9da1c..04deb629 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 642fcd64..3e7c5841 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 709721cb..7008d7a8 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index c580554e..ec9c12ab 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 2ec0383f..cdb0da34 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index aa7f95f5..9fc899e7 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5c691102..7447104b 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index cb2e6fc1..58251912 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 935a5b13..a789fcdd 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 93288e50..f3972ee3 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index c68000ae..45dbe5be 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 9326eeb2..3310768d 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8a87de52..df7c12ce 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index bb6c5a6a..db1d0307 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 7feed3bd..4dfa3767 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 2629842e..edbebbff 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8faec2d5..55654891 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:31:25 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index 74e4d8e9..5232ffdd 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-06-01 11:31:25 UTC

            +

            vectorization/vectorization_matrix.json

            2025-06-01 11:43:19 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From 2399a4df6fd5a034f93c967ff9ff0554f008320b Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 1 Jun 2025 13:51:04 +0200 Subject: [PATCH 28/52] update the script. --- .github/workflows/bench.yml | 2 +- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- site/deeplearning/dl-layer-rmsnorm-benchmark.html | 2 +- site/deeplearning/dl-layer-selfattention-benchmark.html | 2 +- site/deeplearning/dl-model-lenet-benchmark.html | 2 +- site/deeplearning/dl-model-mobilenetv3-benchmark.html | 2 +- site/deeplearning/dl-model-resnet18-benchmark.html | 2 +- site/deeplearning/dl-model-tinyllama-benchmark.html | 2 +- site/deeplearning/dl-model-whisper-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithaddf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithdivf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithmulf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithnegf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithsubf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- .../dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-mathexp-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-mathfpow-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-matmul-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html | 2 +- .../dl-op-linalg-softmax-exp-sum-div-benchmark.html | 2 +- site/deeplearning/dl-op-matmul-transpose-b-benchmark.html | 2 +- site/deeplearning/dl-op-tosa-transpose-benchmark.html | 2 +- ...ianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...anKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ogKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ttKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...anKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ogKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ttKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/vectorization/vectorization_matrix.html | 2 +- 58 files changed, 58 insertions(+), 58 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index d4032eb8..c1df8c03 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -75,7 +75,7 @@ jobs: - name: Push to buddy-compiler.github.io uses: peaceiris/actions-gh-pages@v4 - if: github.event_name == 'push' && github.ref == 'refs/heads/master' + if: github.event_name == 'push' && github.ref == 'refs/heads/main' with: personal_token: ${{ secrets.BUDDY_SITE_PAT }} external_repository: buddy-compiler/buddy-compiler.github.io diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index 84a27ead..99ac4904 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index 38099142..be34ccc6 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06530.065310,788
            diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index 7eae0006..c0c324f9 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196355,522
            diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index c8dddad5..9f971496 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index 9085966d..6e5bba41 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1610.1614,427
            diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index 6f752b97..0923bb0d 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar35.935.918
            diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index 9ad63e41..75a4f72d 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7197181
            diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index 5ceddcd5..bf33823b 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.73e+051.73e+051
            diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index 692dfeed..80ab548b 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization7.92e+047.92e+041
            diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 23d4dc5f..0aab870f 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02990.029923,357
            diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 19d41836..20d68663 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02930.029323,918
            diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index ac2347ed..bdaee9fa 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02960.029623,548
            diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index f96ac10a..b66af590 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02280.022830,522
            diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 0cbdfae1..1d80d99b 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02930.029323,583
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index 8600d9b4..b2a66e34 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.53e+033.53e+031
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 9a50cd1a..cdfd4051 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index fe720ac0..80e1b4f4 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.572.55
            diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 586abac5..a2cec907 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.432.421
            diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index d4b5967b..643bbfe3 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:57.277.275
            diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 0a8c6623..b41ad789 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,072
            diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index 39aea529..0295ec0e 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08520.08528,120
            diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index 1aeacd74..bbb20831 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,624
            diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index aa81c13b..4cb7430d 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:14.1e+034.1e+031
            diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index f3353f49..640d0b48 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 11:45:42 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index 94ffbc43..104fbec2 100644
            --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 11:45:42 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index e428e2ad..79606a35 100644
            --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2332,993
            diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index dd833136..aef4afc3 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005590.00559123,186
            diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index db2aa6aa..930abf52 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 11:43:19 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 11:45:42 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.04e+035
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b33cd851..369bb012 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:524.917.75
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 0422f476..3321e8ae 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e73939c8..7bd23b0a 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 30ce87b5..869aa811 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 0e7d7084..cabb75c7 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 3d916ff6..2f3bf819 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index c3d7a107..408c65c2 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 2edc917d..48120768 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 8d8c9e13..3bc20444 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 154f7e83..dccd72fc 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b99bc1cc..ff976f51 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 04deb629..04d4a6d0 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 3e7c5841..68923b18 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 7008d7a8..74a6364b 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index ec9c12ab..ca5239f5 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index cdb0da34..1899a133 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 9fc899e7..e2af5fb9 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 7447104b..a0a7fe05 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 58251912..48ccf0a7 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index a789fcdd..6accf1c7 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index f3972ee3..474282cb 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 45dbe5be..f29f3377 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 3310768d..1201302c 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index df7c12ce..82fabc3d 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index db1d0307..3e4c90b1 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 4dfa3767..8939c822 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index edbebbff..701ce2fe 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 55654891..cad1605d 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:43:19 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index 5232ffdd..a7234b00 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-06-01 11:43:19 UTC

            +

            vectorization/vectorization_matrix.json

            2025-06-01 11:45:42 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From 30a34e7f892297160fd1730cf8107e5951b321a4 Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 15:39:30 +0000 Subject: [PATCH 29/52] test --- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/vectorization/vectorization_matrix.html | 2 +- .../deeplearning/dl-layer-ffn-benchmark.json | 16 +- .../deeplearning/dl-layer-ffn-benchmark.log | 8 +- .../dl-layer-rmsnorm-benchmark.json | 16 +- .../dl-layer-rmsnorm-benchmark.log | 8 +- .../dl-layer-selfattention-benchmark.json | 14 +- .../dl-layer-selfattention-benchmark.log | 6 +- .../dl-model-lenet-benchmark.json | 16 +- .../deeplearning/dl-model-lenet-benchmark.log | 10 +- .../dl-model-mobilenetv3-benchmark.json | 16 +- .../dl-model-mobilenetv3-benchmark.log | 8 +- .../dl-model-resnet18-benchmark.json | 12 +- .../dl-model-resnet18-benchmark.log | 8 +- .../dl-model-tinyllama-benchmark.json | 16 +- .../dl-model-tinyllama-benchmark.log | 10 +- .../dl-model-whisper-benchmark.json | 12 +- .../dl-model-whisper-benchmark.log | 8 +- .../dl-op-linalg-arithaddf-benchmark.json | 16 +- .../dl-op-linalg-arithaddf-benchmark.log | 8 +- .../dl-op-linalg-arithdivf-benchmark.json | 16 +- .../dl-op-linalg-arithdivf-benchmark.log | 8 +- .../dl-op-linalg-arithmulf-benchmark.json | 16 +- .../dl-op-linalg-arithmulf-benchmark.log | 8 +- .../dl-op-linalg-arithnegf-benchmark.json | 16 +- .../dl-op-linalg-arithnegf-benchmark.log | 8 +- .../dl-op-linalg-arithsubf-benchmark.json | 16 +- .../dl-op-linalg-arithsubf-benchmark.log | 8 +- .../dl-op-linalg-batch-matmul-benchmark.json | 32 +- .../dl-op-linalg-batch-matmul-benchmark.log | 16 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.json | 14 +- ...l-op-linalg-conv2d-nchw-fchw-benchmark.log | 6 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.json | 20 +- ...l-op-linalg-conv2d-nhwc-fhwc-benchmark.log | 10 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.json | 16 +- ...l-op-linalg-conv2d-nhwc-hwcf-benchmark.log | 8 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.json | 16 +- ...g-depthwise-conv-2d-nhwc-hwc-benchmark.log | 10 +- .../dl-op-linalg-mathexp-benchmark.json | 16 +- .../dl-op-linalg-mathexp-benchmark.log | 8 +- .../dl-op-linalg-mathfpow-benchmark.json | 16 +- .../dl-op-linalg-mathfpow-benchmark.log | 8 +- .../dl-op-linalg-mathrsqrt-benchmark.json | 16 +- .../dl-op-linalg-mathrsqrt-benchmark.log | 8 +- .../dl-op-linalg-matmul-benchmark.json | 24 +- .../dl-op-linalg-matmul-benchmark.log | 14 +- ...-op-linalg-pooling-nhwc-sum-benchmark.json | 16 +- ...l-op-linalg-pooling-nhwc-sum-benchmark.log | 8 +- .../dl-op-linalg-reduceaddf-benchmark.json | 4 +- .../dl-op-linalg-reduceaddf-benchmark.log | 4 +- .../dl-op-linalg-reducemaxf-benchmark.json | 4 +- .../dl-op-linalg-reducemaxf-benchmark.log | 4 +- ...-linalg-softmax-exp-sum-div-benchmark.json | 16 +- ...p-linalg-softmax-exp-sum-div-benchmark.log | 8 +- .../dl-op-matmul-transpose-b-benchmark.json | 20 +- .../dl-op-matmul-transpose-b-benchmark.log | 12 +- .../dl-op-tosa-transpose-benchmark.json | 12 +- .../dl-op-tosa-transpose-benchmark.log | 8 +- test_result/geminiprocessing/build.log | 655 ++++++++++++++++++ .../geminiprocessing/cmake_configure.log | 37 + 115 files changed, 1088 insertions(+), 396 deletions(-) create mode 100644 test_result/geminiprocessing/build.log create mode 100644 test_result/geminiprocessing/cmake_configure.log diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index 99ac4904..2b2d12fe 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index be34ccc6..2f1c7c9b 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06530.065310,788
            diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index c0c324f9..e965d890 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196355,522
            diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index 9f971496..55e8138e 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index 6e5bba41..6be30cee 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1610.1614,427
            diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index 0923bb0d..9fb60049 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar35.935.918
            diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index 75a4f72d..6a16e291 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7197181
            diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index bf33823b..11fad0ea 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.73e+051.73e+051
            diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index 80ab548b..04926c66 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization7.92e+047.92e+041
            diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 0aab870f..16ff5cb1 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02990.029923,357
            diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 20d68663..a6b49a6e 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02930.029323,918
            diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index bdaee9fa..a092ba39 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02960.029623,548
            diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index b66af590..02f5e54c 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02280.022830,522
            diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 1d80d99b..99c81a5b 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02930.029323,583
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index b2a66e34..fe6b276a 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.53e+033.53e+031
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index cdfd4051..94cfd45c 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index 80e1b4f4..6a7356be 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.572.55
            diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index a2cec907..6e8dcbc6 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.432.421
            diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index 643bbfe3..cebd6ba5 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:57.277.275
            diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index b41ad789..49bfdb44 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,072
            diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index 0295ec0e..cf9b5f25 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08520.08528,120
            diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index bbb20831..13690791 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,624
            diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index 4cb7430d..5f4e3e45 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:14.1e+034.1e+031
            diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index 640d0b48..025cb892 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 11:51:18 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index 104fbec2..b3d37937 100644
            --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 11:51:18 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-06-01T09:44:07+00:00
            diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 79606a35..70648fb8 100644
            --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2332,993
            diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index aef4afc3..8878068a 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005590.00559123,186
            diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index 930abf52..e1189cd9 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 11:45:42 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 11:51:18 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.04e+035
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 369bb012..a7170e4e 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:524.917.75
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 3321e8ae..c3fe25af 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 7bd23b0a..78583280 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 869aa811..8de2abfa 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index cabb75c7..e63c263e 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 2f3bf819..0578ab54 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 408c65c2..fcecaf28 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 48120768..5863c25f 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 3bc20444..184cabd9 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index dccd72fc..05b9941f 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index ff976f51..b5c86a18 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 04d4a6d0..5b96ad7d 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 68923b18..6091af1b 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 74a6364b..822c611d 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index ca5239f5..51800cd5 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 1899a133..06b583b9 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e2af5fb9..61778e2a 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index a0a7fe05..6704c12e 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 48ccf0a7..7a7ba37b 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 6accf1c7..1cf7bd35 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 474282cb..76402b2a 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index f29f3377..1547a6d4 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 1201302c..4b980272 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 82fabc3d..ded7d114 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 3e4c90b1..b1a3173e 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8939c822..8ece84be 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 701ce2fe..2e6263cd 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index cad1605d..cb9f1665 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:45:42 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index a7234b00..92c881d4 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-06-01 11:45:42 UTC

            +

            vectorization/vectorization_matrix.json

            2025-06-01 11:51:18 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.json b/test_result/deeplearning/dl-layer-ffn-benchmark.json index f2efeaac..2aa81f81 100644 --- a/test_result/deeplearning/dl-layer-ffn-benchmark.json +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:17+00:00", + "date": "2025-07-27T14:26:49+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-layer-ffn-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.02002,1.92676,3.979], + "load_avg": [1.03564,1.19092,1.30615], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 10788, - "real_time": 6.5253924752468523e-02, - "cpu_time": 6.5252047089358556e-02, + "iterations": 10762, + "real_time": 6.5369959010264392e-02, + "cpu_time": 6.5368221055565889e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 25830, - "real_time": 2.6970787858644568e-02, - "cpu_time": 2.6970320867208675e-02, + "iterations": 25673, + "real_time": 2.7106251964175147e-02, + "cpu_time": 2.7104521248003739e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log index 4a30e9a6..8a384bc4 100644 --- a/test_result/deeplearning/dl-layer-ffn-benchmark.log +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:17+00:00 +2025-07-27T14:26:49+00:00 Running ./dl-layer-ffn-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.93, 3.98 +Load Average: 1.04, 1.19, 1.31 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------- -DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10788 -DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25830 +DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10762 +DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25673 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json index d91335de..e1b8cabe 100644 --- a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:21+00:00", + "date": "2025-07-27T14:26:53+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-layer-rmsnorm-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.02002,1.92676,3.979], + "load_avg": [1.03271,1.1875,1.3042], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 355522, - "real_time": 1.9578834433703969e-03, - "cpu_time": 1.9577640146038783e-03, + "iterations": 356202, + "real_time": 1.9603331904748766e-03, + "cpu_time": 1.9603087601978656e-03, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 763038, - "real_time": 9.0721212688664129e-04, - "cpu_time": 9.0718018892899164e-04, + "iterations": 751546, + "real_time": 9.1466103771357563e-04, + "cpu_time": 9.1459137830551969e-04, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log index 66a6a4da..e0272f58 100644 --- a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:21+00:00 +2025-07-27T14:26:53+00:00 Running ./dl-layer-rmsnorm-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.93, 3.98 +Load Average: 1.03, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------ -DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 355522 -DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 763038 +DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 356202 +DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 751546 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.json b/test_result/deeplearning/dl-layer-selfattention-benchmark.json index f2451cc0..ad75a13e 100644 --- a/test_result/deeplearning/dl-layer-selfattention-benchmark.json +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:19+00:00", + "date": "2025-07-27T14:26:51+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-layer-selfattention-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.02002,1.92676,3.979], + "load_avg": [1.03564,1.19092,1.30615], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 149, - "real_time": 4.6853602007531476e+00, - "cpu_time": 4.6852666510067111e+00, + "real_time": 4.6930055590283954e+00, + "cpu_time": 4.6929284496644295e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 447, - "real_time": 1.5682149470119136e+00, - "cpu_time": 1.5680948903803129e+00, + "iterations": 446, + "real_time": 1.5730429983660246e+00, + "cpu_time": 1.5730149080717490e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log index f4e9fd5c..05dd5003 100644 --- a/test_result/deeplearning/dl-layer-selfattention-benchmark.log +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:19+00:00 +2025-07-27T14:26:51+00:00 Running ./dl-layer-selfattention-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.93, 3.98 +Load Average: 1.04, 1.19, 1.31 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- DL_LAYER_ATTENTION/Scalar 4.69 ms 4.69 ms 149 -DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 447 +DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 446 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.json b/test_result/deeplearning/dl-model-lenet-benchmark.json index 12256cab..5b50e363 100644 --- a/test_result/deeplearning/dl-model-lenet-benchmark.json +++ b/test_result/deeplearning/dl-model-lenet-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:39:21+00:00", + "date": "2025-07-27T14:22:52+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-lenet-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.00293,3.05371,4.85986], + "load_avg": [1.40137,1.39453,1.396], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4427, - "real_time": 1.6056818065873274e-01, - "cpu_time": 1.6056101242376328e-01, + "iterations": 4304, + "real_time": 1.6466251636775675e-01, + "cpu_time": 1.6464831319702602e-01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5106, - "real_time": 1.3631909335658823e-01, - "cpu_time": 1.3631605327066199e-01, + "iterations": 5022, + "real_time": 1.3717319905366238e-01, + "cpu_time": 1.3716751294305060e-01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log index dd5e81a1..0debea96 100644 --- a/test_result/deeplearning/dl-model-lenet-benchmark.log +++ b/test_result/deeplearning/dl-model-lenet-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:39:21+00:00 +2025-07-27T14:22:52+00:00 Running ./dl-model-lenet-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.00, 3.05, 4.86 +Load Average: 1.40, 1.39, 1.40 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------- -DL_MODEL_LENET/Auto_Vectorization 0.161 ms 0.161 ms 4427 -DL_MODEL_LENET/Buddy_Vectorization 0.136 ms 0.136 ms 5106 +DL_MODEL_LENET/Auto_Vectorization 0.165 ms 0.165 ms 4304 +DL_MODEL_LENET/Buddy_Vectorization 0.137 ms 0.137 ms 5022 ----------------------------------------------------------- Correctness Verification: -Transform case: PASS +Transform case: FAIL ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json index e255bf23..7aa54d99 100644 --- a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:39:18+00:00", + "date": "2025-07-27T14:22:49+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-mobilenetv3-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.00293,3.05371,4.85986], + "load_avg": [1.40137,1.39453,1.396], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 18, - "real_time": 3.5876018926501274e+01, - "cpu_time": 3.5873441388888892e+01, + "iterations": 19, + "real_time": 3.7132115740525094e+01, + "cpu_time": 3.7130740473684206e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 22, - "real_time": 3.2652140764350243e+01, - "cpu_time": 3.2651343363636357e+01, + "iterations": 21, + "real_time": 3.2978398547995660e+01, + "cpu_time": 3.2976469809523813e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log index f3872e7a..1a74d4d6 100644 --- a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:39:18+00:00 +2025-07-27T14:22:49+00:00 Running ./dl-model-mobilenetv3-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.00, 3.05, 4.86 +Load Average: 1.40, 1.39, 1.40 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------- -BM_MobileNet_V3/BM_MobileNet_V3_scalar 35.9 ms 35.9 ms 18 -BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.7 ms 32.7 ms 22 +BM_MobileNet_V3/BM_MobileNet_V3_scalar 37.1 ms 37.1 ms 19 +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 33.0 ms 33.0 ms 21 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.json b/test_result/deeplearning/dl-model-resnet18-benchmark.json index 6b9417af..95a6eefe 100644 --- a/test_result/deeplearning/dl-model-resnet18-benchmark.json +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:14+00:00", + "date": "2025-07-27T14:26:46+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-resnet18-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.02197,1.94287,3.99512], + "load_avg": [1.03906,1.19434,1.30811], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 7.1908619441092014e+02, - "cpu_time": 7.1777358900000013e+02, + "real_time": 7.3107384704053402e+02, + "cpu_time": 7.2304104800000016e+02, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 7.2598466090857983e+02, - "cpu_time": 7.1803150299999993e+02, + "real_time": 7.2872185707092285e+02, + "cpu_time": 7.2154317500000013e+02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.log b/test_result/deeplearning/dl-model-resnet18-benchmark.log index 2159c701..d445e776 100644 --- a/test_result/deeplearning/dl-model-resnet18-benchmark.log +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:14+00:00 +2025-07-27T14:26:46+00:00 Running ./dl-model-resnet18-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.94, 4.00 +Load Average: 1.04, 1.19, 1.31 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -DL_MODEL_Resnet18/Auto_Vectorization 719 ms 718 ms 1 -DL_MODEL_Resnet18/Buddy_Vectorization 726 ms 718 ms 1 +DL_MODEL_Resnet18/Auto_Vectorization 731 ms 723 ms 1 +DL_MODEL_Resnet18/Buddy_Vectorization 729 ms 722 ms 1 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.json b/test_result/deeplearning/dl-model-tinyllama-benchmark.json index c746579b..cd8070f6 100644 --- a/test_result/deeplearning/dl-model-tinyllama-benchmark.json +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:33:00+00:00", + "date": "2025-07-27T14:17:33+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-tinyllama-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.75537,5.10107,6.17822], + "load_avg": [1.70264,1.92041,1.53662], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.7263756665587425e+05, - "cpu_time": 1.7263429748099999e+05, + "real_time": 1.3918454140797257e+05, + "cpu_time": 1.3917853827299998e+05, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.0490998193621635e+04, - "cpu_time": 1.0490786658000019e+04, + "real_time": 1.0038352340459824e+04, + "cpu_time": 1.0037513700999994e+04, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 8.2188854273408651e+03, - "cpu_time": 7.6072349719999866e+03, + "real_time": 7.8359359223395586e+03, + "cpu_time": 7.2006253560000177e+03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.log b/test_result/deeplearning/dl-model-tinyllama-benchmark.log index f5e906ed..7ac00b10 100644 --- a/test_result/deeplearning/dl-model-tinyllama-benchmark.log +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:33:00+00:00 +2025-07-27T14:17:33+00:00 Running ./dl-model-tinyllama-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.76, 5.10, 6.18 +Load Average: 1.70, 1.92, 1.54 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ---------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------- -DL_MODEL_TINYLLAMA/scalar 172638 ms 172634 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt 10491 ms 10491 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt_omp 8219 ms 7607 ms 1 +DL_MODEL_TINYLLAMA/scalar 139185 ms 139179 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt 10038 ms 10038 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt_omp 7836 ms 7201 ms 1 ---------- Verification ---------- matmul_opt PASS matmul_opt_omp PASS diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.json b/test_result/deeplearning/dl-model-whisper-benchmark.json index 51a20002..97d932d5 100644 --- a/test_result/deeplearning/dl-model-whisper-benchmark.json +++ b/test_result/deeplearning/dl-model-whisper-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:39:22+00:00", + "date": "2025-07-27T14:22:54+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-whisper-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.00244,3.03613,4.84424], + "load_avg": [1.44971,1.40479,1.39941], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 7.9215591410174966e+04, - "cpu_time": 7.9212764204999999e+04, + "real_time": 7.9983285805210471e+04, + "cpu_time": 7.9980347596000007e+04, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.6910132765769958e+04, - "cpu_time": 3.6904499842000005e+04, + "real_time": 3.6713125728070736e+04, + "cpu_time": 3.6699949372999996e+04, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.log b/test_result/deeplearning/dl-model-whisper-benchmark.log index 2f896336..b34ddef1 100644 --- a/test_result/deeplearning/dl-model-whisper-benchmark.log +++ b/test_result/deeplearning/dl-model-whisper-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:39:22+00:00 +2025-07-27T14:22:54+00:00 Running ./dl-model-whisper-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.00, 3.04, 4.84 +Load Average: 1.45, 1.40, 1.40 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_MODEL_Whisper/Auto_Vectorization 79216 ms 79213 ms 1 -DL_MODEL_Whisper/Buddy_Vectorization 36910 ms 36904 ms 1 +DL_MODEL_Whisper/Auto_Vectorization 79983 ms 79980 ms 1 +DL_MODEL_Whisper/Buddy_Vectorization 36713 ms 36700 ms 1 ----------------------------------------------------------- Correctness Verification for Output1: PASS Correctness Verification for Output2: FAIL diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json index 8c844c94..56be6d39 100644 --- a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:50+00:00", + "date": "2025-07-27T14:27:23+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithaddf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.01074,1.8374,3.88281], + "load_avg": [1.07178,1.18408,1.29834], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23357, - "real_time": 2.9862665965978977e-02, - "cpu_time": 2.9862255341011262e-02, + "iterations": 23451, + "real_time": 2.9521707521156536e-02, + "cpu_time": 2.9520921282674511e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 164695, - "real_time": 3.9880004303424737e-03, - "cpu_time": 3.9879427426454967e-03, + "iterations": 174931, + "real_time": 4.0048593536238502e-03, + "cpu_time": 4.0046560300918644e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log index bc68b6b2..55be5634 100644 --- a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:50+00:00 +2025-07-27T14:27:23+00:00 Running ./dl-op-linalg-arithaddf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.84, 3.88 +Load Average: 1.07, 1.18, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_ADDF_SCALAR 0.030 ms 0.030 ms 23357 -BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 164695 +BM_ADDF_SCALAR 0.030 ms 0.030 ms 23451 +BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 174931 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json index 72f7533b..b0bf62a8 100644 --- a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:53+00:00", + "date": "2025-07-27T14:27:25+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithdivf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00977,1.82324,3.86719], + "load_avg": [1.07178,1.18408,1.29834], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23918, - "real_time": 2.9331494164173792e-02, - "cpu_time": 2.9330557111798643e-02, + "iterations": 23358, + "real_time": 2.9819811858515573e-02, + "cpu_time": 2.9819110540285985e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 73794, - "real_time": 1.0297565909804414e-02, - "cpu_time": 1.0297454332330543e-02, + "iterations": 67517, + "real_time": 9.4892800457678818e-03, + "cpu_time": 9.4890076869529171e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log index 87c0b507..14368fa8 100644 --- a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:53+00:00 +2025-07-27T14:27:25+00:00 Running ./dl-op-linalg-arithdivf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.82, 3.87 +Load Average: 1.07, 1.18, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_DIVF_SCALAR 0.029 ms 0.029 ms 23918 -BM_DIVF_AutoVectorization 0.010 ms 0.010 ms 73794 +BM_DIVF_SCALAR 0.030 ms 0.030 ms 23358 +BM_DIVF_AutoVectorization 0.009 ms 0.009 ms 67517 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json index 28b9d220..323f84ef 100644 --- a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:54+00:00", + "date": "2025-07-27T14:27:27+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithmulf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00977,1.82324,3.86719], + "load_avg": [1.07178,1.18408,1.29834], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23548, - "real_time": 2.9561160227525248e-02, - "cpu_time": 2.9560279938848310e-02, + "iterations": 23441, + "real_time": 2.9818539820246910e-02, + "cpu_time": 2.9818065739516234e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 146698, - "real_time": 3.9905801919875427e-03, - "cpu_time": 3.9904828423018707e-03, + "iterations": 175263, + "real_time": 3.9962025305209769e-03, + "cpu_time": 3.9961001922824537e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log index 16998437..54426146 100644 --- a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:54+00:00 +2025-07-27T14:27:27+00:00 Running ./dl-op-linalg-arithmulf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.82, 3.87 +Load Average: 1.07, 1.18, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_MULF_SCALAR 0.030 ms 0.030 ms 23548 -BM_MULF_AutoVectorization 0.004 ms 0.004 ms 146698 +BM_MULF_SCALAR 0.030 ms 0.030 ms 23441 +BM_MULF_AutoVectorization 0.004 ms 0.004 ms 175263 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json index ce4296db..dbc3bdf1 100644 --- a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:57+00:00", + "date": "2025-07-27T14:27:29+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithnegf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00879,1.80957,3.85156], + "load_avg": [1.06592,1.18066,1.29639], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 30522, - "real_time": 2.2840192988802945e-02, - "cpu_time": 2.2839767708538104e-02, + "iterations": 30969, + "real_time": 2.2511821252336077e-02, + "cpu_time": 2.2511652910975493e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 279150, - "real_time": 2.4855299081273687e-03, - "cpu_time": 2.4854867562242519e-03, + "iterations": 277205, + "real_time": 2.4580652904906994e-03, + "cpu_time": 2.4580170235024620e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log index 069ba6d3..ba3ca38e 100644 --- a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:57+00:00 +2025-07-27T14:27:29+00:00 Running ./dl-op-linalg-arithnegf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.81, 3.85 +Load Average: 1.07, 1.18, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_NEGF_SCALAR 0.023 ms 0.023 ms 30522 -BM_NEGF_AutoVectorization 0.002 ms 0.002 ms 279150 +BM_NEGF_SCALAR 0.023 ms 0.023 ms 30969 +BM_NEGF_AutoVectorization 0.002 ms 0.002 ms 277205 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json index 38f62ff3..f6eb7c59 100644 --- a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:59+00:00", + "date": "2025-07-27T14:27:31+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithsubf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00879,1.80957,3.85156], + "load_avg": [1.06592,1.18066,1.29639], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23583, - "real_time": 2.9263793471199923e-02, - "cpu_time": 2.9262871305601495e-02, + "iterations": 23509, + "real_time": 2.9357210888091442e-02, + "cpu_time": 2.9356123654770513e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 175569, - "real_time": 3.9921569896972341e-03, - "cpu_time": 3.9921202718019690e-03, + "iterations": 175223, + "real_time": 3.9904742644157176e-03, + "cpu_time": 3.9903206884940911e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log index 4c1f090c..0e7bfdce 100644 --- a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:59+00:00 +2025-07-27T14:27:31+00:00 Running ./dl-op-linalg-arithsubf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.81, 3.85 +Load Average: 1.07, 1.18, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_SUBF_SCALAR 0.029 ms 0.029 ms 23583 -BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 175569 +BM_SUBF_SCALAR 0.029 ms 0.029 ms 23509 +BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 175223 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json index 3cc18461..b404dcfc 100644 --- a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:41+00:00", + "date": "2025-07-27T14:27:13+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-batch-matmul-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.01367,1.86621,3.91455], + "load_avg": [1.08496,1.19092,1.30225], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.5291669797152281e+03, - "cpu_time": 3.5290844250000000e+03, + "real_time": 3.5355993732810020e+03, + "cpu_time": 3.5355283939999999e+03, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 9.7394227981567383e+02, - "cpu_time": 9.7389873400000045e+02, + "real_time": 9.7580210678279400e+02, + "cpu_time": 9.7576049499999988e+02, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.9066828303039074e+02, - "cpu_time": 1.9066431600000033e+02, + "real_time": 1.9509307853877544e+02, + "cpu_time": 1.9508785299999954e+02, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.0904773883521557e+02, - "cpu_time": 1.0903675299999982e+02, + "real_time": 1.0930293612182140e+02, + "cpu_time": 1.0930234899999914e+02, "time_unit": "ms" }, { @@ -102,8 +102,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.1676310375332832e+02, - "cpu_time": 1.1675743199999999e+02, + "real_time": 1.1755896359682083e+02, + "cpu_time": 1.1755820300000065e+02, "time_unit": "ms" }, { @@ -116,8 +116,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.5137140378355980e+02, - "cpu_time": 3.5136892199999983e+02, + "real_time": 3.5555119253695011e+02, + "cpu_time": 3.5554585000000037e+02, "time_unit": "ms" }, { @@ -130,8 +130,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 8.0572975799441338e+01, - "cpu_time": 3.2125255999999602e+01, + "real_time": 6.2015011906623840e+01, + "cpu_time": 3.2137073000000349e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log index 869f52c6..30de184d 100644 --- a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:41+00:00 +2025-07-27T14:27:13+00:00 Running ./dl-op-linalg-batch-matmul-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,18 +6,18 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.87, 3.91 +Load Average: 1.08, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3529 ms 3529 ms 1 -DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 974 ms 974 ms 1 -DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 191 ms 191 ms 1 +DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3536 ms 3536 ms 1 +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 976 ms 976 ms 1 +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 195 ms 195 ms 1 DL_OPS_BATCH_MATMUL/Tile/iterations:1 109 ms 109 ms 1 -DL_OPS_BATCH_MATMUL/SCF/iterations:1 117 ms 117 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 351 ms 351 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 80.6 ms 32.1 ms 1 +DL_OPS_BATCH_MATMUL/SCF/iterations:1 118 ms 118 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 356 ms 356 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 62.0 ms 32.1 ms 1 ---------- Verification ---------- Tile PASS SCF PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json index 3ea50b39..ecbac2fd 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:34+00:00", + "date": "2025-07-27T14:27:06+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-conv2d-nchw-fchw-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.01514,1.88086,3.93066], + "load_avg": [1.10107,1.19775,1.30615], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 2, - "real_time": 2.8255500365048647e+02, - "cpu_time": 2.8254993450000001e+02, + "real_time": 2.8337553981691599e+02, + "cpu_time": 2.8337256700000000e+02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 69, - "real_time": 1.0128497186562289e+01, - "cpu_time": 1.0128337550724638e+01, + "iterations": 101, + "real_time": 6.8005450660049327e+00, + "cpu_time": 6.8004278415841570e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log index 5b0748ec..c1434203 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:34+00:00 +2025-07-27T14:27:06+00:00 Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.88, 3.93 +Load Average: 1.10, 1.20, 1.31 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- BM_Conv2DNchwFchw_SCALAR 283 ms 283 ms 2 -BM_Conv2DNchwFchw_Im2col 10.1 ms 10.1 ms 69 +BM_Conv2DNchwFchw_Im2col 6.80 ms 6.80 ms 101 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json index 67eb81ab..09676753 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:38+00:00", + "date": "2025-07-27T14:27:10+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-conv2d-nhwc-fhwc-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.01367,1.86621,3.91455], + "load_avg": [1.09277,1.19434,1.3042], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 7.2542962804436684e+01, - "cpu_time": 7.2539947599999991e+01, + "real_time": 7.2274770587682724e+01, + "cpu_time": 7.2273260000000008e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 9.3459729105234146e+00, - "cpu_time": 9.3459027999999940e+00, + "real_time": 9.3490019440650940e+00, + "cpu_time": 9.3489287999999995e+00, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.7330713570117950e+00, - "cpu_time": 1.7330618000000020e+00, + "real_time": 1.8200688064098358e+00, + "cpu_time": 1.8200498000000009e+00, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.7253447324037552e+00, - "cpu_time": 1.7253329999999956e+00, + "real_time": 1.8165208399295807e+00, + "cpu_time": 1.8150957999999995e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log index d2f0e099..78da0d68 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:38+00:00 +2025-07-27T14:27:10+00:00 Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,15 +6,15 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.87, 3.91 +Load Average: 1.09, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------------- -DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 72.5 ms 72.5 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 72.3 ms 72.3 ms 5 DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.35 ms 9.35 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.73 ms 1.73 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.73 ms 1.73 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.82 ms 1.82 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.82 ms 1.82 ms 5 ---------- Verification ---------- auto_vectorization PASS vectorization PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json index 02682d22..2e56de32 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:36+00:00", + "date": "2025-07-27T14:27:08+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-conv2d-nhwc-hwcf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.01514,1.88086,3.93066], + "load_avg": [1.09277,1.19434,1.3042], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 21, - "real_time": 3.2353343175990240e+01, - "cpu_time": 3.2352136285714280e+01, + "iterations": 22, + "real_time": 3.2280084253712133e+01, + "cpu_time": 3.2279513863636353e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 114, - "real_time": 6.1570262176948685e+00, - "cpu_time": 6.1568676929824564e+00, + "iterations": 113, + "real_time": 6.1282727339890153e+00, + "cpu_time": 6.1282270707964628e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log index 0ee7bfdf..590ee959 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:36+00:00 +2025-07-27T14:27:08+00:00 Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.88, 3.93 +Load Average: 1.09, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------- -BM_CONV_2D_NHWC_HWCF_SCALAR 32.4 ms 32.4 ms 21 -BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.16 ms 6.16 ms 114 +BM_CONV_2D_NHWC_HWCF_SCALAR 32.3 ms 32.3 ms 22 +BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.13 ms 6.13 ms 113 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json index 482a1fac..03dc2cf5 100644 --- a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:38+00:00", + "date": "2025-07-27T14:27:11+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.01367,1.86621,3.91455], + "load_avg": [1.09277,1.19434,1.3042], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 7.2686385363340378e+00, - "cpu_time": 7.2663935999999989e+00, + "real_time": 4.2500682175159454e+00, + "cpu_time": 4.2499596000000004e+00, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.6840752214193344e+00, - "cpu_time": 1.6840592000000001e+00, + "real_time": 1.7119426280260086e+00, + "cpu_time": 1.7119346000000006e+00, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.2709796428680420e-01, - "cpu_time": 1.2711200000000006e-01, + "real_time": 1.2489855289459229e-01, + "cpu_time": 1.2490460000000037e-01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log index 33593701..89a765c1 100644 --- a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:38+00:00 +2025-07-27T14:27:11+00:00 Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.87, 3.91 +Load Average: 1.09, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------ -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 7.27 ms 7.27 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.68 ms 1.68 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.127 ms 0.127 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 4.25 ms 4.25 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.71 ms 1.71 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.125 ms 0.125 ms 5 ---------- Verification ---------- auto_vectorization PASS vectorization PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json index d824a439..a6ccdbf2 100644 --- a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:44:04+00:00", + "date": "2025-07-27T14:27:37+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-mathexp-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00781,1.7959,3.83594], + "load_avg": [1.06055,1.17725,1.29443], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 15072, - "real_time": 4.5585291086441014e-02, - "cpu_time": 4.5584231754246279e-02, + "iterations": 15225, + "real_time": 4.5636733275133207e-02, + "cpu_time": 4.5634870213464691e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 22245, - "real_time": 3.1635766251485356e-02, - "cpu_time": 3.1635336165430433e-02, + "iterations": 22248, + "real_time": 3.1553120752518572e-02, + "cpu_time": 3.1552361156058965e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log index bdfaa89a..808a3eed 100644 --- a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:44:04+00:00 +2025-07-27T14:27:37+00:00 Running ./dl-op-linalg-mathexp-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.80, 3.84 +Load Average: 1.06, 1.18, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- -BM_EXP_SCALAR 0.046 ms 0.046 ms 15072 -BM_EXP_AutoVectorization 0.032 ms 0.032 ms 22245 +BM_EXP_SCALAR 0.046 ms 0.046 ms 15225 +BM_EXP_AutoVectorization 0.032 ms 0.032 ms 22248 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json index c3598f2a..fd613964 100644 --- a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:44:01+00:00", + "date": "2025-07-27T14:27:33+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-mathfpow-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00879,1.80957,3.85156], + "load_avg": [1.06055,1.17725,1.29443], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 8120, - "real_time": 8.5159794432042274e-02, - "cpu_time": 8.5155036699507400e-02, + "iterations": 8255, + "real_time": 8.4126440369310274e-02, + "cpu_time": 8.4123528649303461e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 12142, - "real_time": 5.6896000527418576e-02, - "cpu_time": 5.6895375061769053e-02, + "iterations": 12305, + "real_time": 5.6897423940429549e-02, + "cpu_time": 5.6896244047135301e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log index 96d084ae..0d519369 100644 --- a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:44:01+00:00 +2025-07-27T14:27:33+00:00 Running ./dl-op-linalg-mathfpow-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.81, 3.85 +Load Average: 1.06, 1.18, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_FPOW_SCALAR 0.085 ms 0.085 ms 8120 -BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12142 +BM_FPOW_SCALAR 0.084 ms 0.084 ms 8255 +BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12305 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json index 8eaf98ee..05e4c7cd 100644 --- a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:44:02+00:00", + "date": "2025-07-27T14:27:35+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-mathrsqrt-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00781,1.7959,3.83594], + "load_avg": [1.06055,1.17725,1.29443], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 9624, - "real_time": 7.2778359749172489e-02, - "cpu_time": 7.2776962281795518e-02, + "iterations": 9537, + "real_time": 7.2811122116920549e-02, + "cpu_time": 7.2809154975359128e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 160866, - "real_time": 4.3446532387546302e-03, - "cpu_time": 4.3446116705829702e-03, + "iterations": 160927, + "real_time": 4.3499197210659290e-03, + "cpu_time": 4.3497833116879093e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log index d95e5fc4..479779e0 100644 --- a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:44:02+00:00 +2025-07-27T14:27:35+00:00 Running ./dl-op-linalg-mathrsqrt-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.80, 3.84 +Load Average: 1.06, 1.18, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9624 -BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 160866 +BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9537 +BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 160927 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json index 1683436c..1a7eb05e 100644 --- a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:23+00:00", + "date": "2025-07-27T14:26:55+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-matmul-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.01807,1.91113,3.96289], + "load_avg": [1.03271,1.1875,1.3042], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 4.0962782036513090e+03, - "cpu_time": 4.0961748020000005e+03, + "real_time": 3.9328473061323166e+03, + "cpu_time": 3.9326995480000000e+03, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.1237379759550095e+03, - "cpu_time": 3.1235837540000002e+03, + "real_time": 3.2123229391872883e+03, + "cpu_time": 3.2121668160000004e+03, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.0998316481709480e+02, - "cpu_time": 1.0997834299999987e+02, + "real_time": 1.1747585423290730e+02, + "cpu_time": 1.1746541299999969e+02, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 5.9036938473582268e+01, - "cpu_time": 5.9036986999999819e+01, + "real_time": 5.9880897402763367e+01, + "cpu_time": 5.9877095000000047e+01, "time_unit": "ms" }, { @@ -102,8 +102,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 2.8818020597100258e+01, - "cpu_time": 1.0956099000000386e+01, + "real_time": 2.1973790600895882e+01, + "cpu_time": 9.1101149999994746e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log index 574fb1d6..7d927f43 100644 --- a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:23+00:00 +2025-07-27T14:26:55+00:00 Running ./dl-op-linalg-matmul-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,16 +6,16 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.91, 3.96 +Load Average: 1.03, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_OPS_MATMUL/scalar_O0/iterations:1 4096 ms 4096 ms 1 -DL_OPS_MATMUL/scalar_O3/iterations:1 3124 ms 3124 ms 1 -DL_OPS_MATMUL/tile/iterations:1 110 ms 110 ms 1 -DL_OPS_MATMUL/vec/iterations:1 59.0 ms 59.0 ms 1 -DL_OPS_MATMUL/vec_omp/iterations:1 28.8 ms 11.0 ms 1 +DL_OPS_MATMUL/scalar_O0/iterations:1 3933 ms 3933 ms 1 +DL_OPS_MATMUL/scalar_O3/iterations:1 3212 ms 3212 ms 1 +DL_OPS_MATMUL/tile/iterations:1 117 ms 117 ms 1 +DL_OPS_MATMUL/vec/iterations:1 59.9 ms 59.9 ms 1 +DL_OPS_MATMUL/vec_omp/iterations:1 22.0 ms 9.11 ms 1 ---------- Verification ---------- tile PASS vec PASS diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json index 593d8cbf..3ad3bd62 100644 --- a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:43:38+00:00", + "date": "2025-07-27T14:27:11+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-pooling-nhwc-sum-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.01367,1.86621,3.91455], + "load_avg": [1.09277,1.19434,1.3042], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2993, - "real_time": 2.3285509203832921e-01, - "cpu_time": 2.3285149949883063e-01, + "iterations": 3002, + "real_time": 2.3286467260475718e-01, + "cpu_time": 2.3285699133910728e-01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 16954, - "real_time": 4.1371273263362526e-02, - "cpu_time": 4.1370382623569658e-02, + "iterations": 16950, + "real_time": 4.1440313236903302e-02, + "cpu_time": 4.1438350265486736e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log index 9f3bd465..abad4aa4 100644 --- a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:43:38+00:00 +2025-07-27T14:27:11+00:00 Running ./dl-op-linalg-pooling-nhwc-sum-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.87, 3.91 +Load Average: 1.09, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 2993 -BM_POOLING_NHWC_SUM_AutoVectorization 0.041 ms 0.041 ms 16954 +BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 3002 +BM_POOLING_NHWC_SUM_AutoVectorization 0.041 ms 0.041 ms 16950 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json index 2b2cb37f..03a50be3 100644 --- a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:44:07+00:00", + "date": "2025-07-27T14:27:39+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-reduceaddf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00684,1.78223,3.82031], + "load_avg": [1.05566,1.17383,1.29248], "library_build_type": "release" }, "benchmarks": [ diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log index b4753f53..56c1154d 100644 --- a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:44:07+00:00 +2025-07-27T14:27:39+00:00 Running ./dl-op-linalg-reduceaddf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,5 +6,5 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.78, 3.82 +Load Average: 1.06, 1.17, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json index 029a8bb3..433fd2b7 100644 --- a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:44:07+00:00", + "date": "2025-07-27T14:27:39+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-reducemaxf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00684,1.78223,3.82031], + "load_avg": [1.05566,1.17383,1.29248], "library_build_type": "release" }, "benchmarks": [ diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log index b3019785..ce068d49 100644 --- a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:44:07+00:00 +2025-07-27T14:27:39+00:00 Running ./dl-op-linalg-reducemaxf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,5 +6,5 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.78, 3.82 +Load Average: 1.06, 1.17, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json index 08e311f7..d66ad13f 100644 --- a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:44:07+00:00", + "date": "2025-07-27T14:27:39+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-softmax-exp-sum-div-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00684,1.78223,3.82031], + "load_avg": [1.05566,1.17383,1.29248], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 123186, - "real_time": 5.5899351610014525e-03, - "cpu_time": 5.5898217898137786e-03, + "iterations": 121646, + "real_time": 5.6556810458011850e-03, + "cpu_time": 5.6554908093977606e-03, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 182176, - "real_time": 3.8475607200048833e-03, - "cpu_time": 3.8474830768048481e-03, + "iterations": 181826, + "real_time": 3.8511731632752664e-03, + "cpu_time": 3.8510635992652325e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log index 914006d4..b85c19b7 100644 --- a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:44:07+00:00 +2025-07-27T14:27:39+00:00 Running ./dl-op-linalg-softmax-exp-sum-div-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.78, 3.82 +Load Average: 1.06, 1.17, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 123186 -BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 182176 +BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 121646 +BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 181826 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json index dc45928e..0240521c 100644 --- a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:44:10+00:00", + "date": "2025-07-27T14:27:42+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-matmul-transpose-b-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00684,1.78223,3.82031], + "load_avg": [1.05566,1.17383,1.29248], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.0452035043388605e+03, - "cpu_time": 1.0435922366000000e+03, + "real_time": 1.0511430144309998e+03, + "cpu_time": 1.0495184466000001e+03, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 2.7667955197393894e+02, - "cpu_time": 2.7666573640000001e+02, + "real_time": 2.7828946411609650e+02, + "cpu_time": 2.7827974260000002e+02, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 3.0310654267668724e+01, - "cpu_time": 1.9424157000000086e+01, + "real_time": 3.2301727309823036e+01, + "cpu_time": 2.2431361399999972e+01, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 8.4920790046453476e+01, - "cpu_time": 8.4914559799999978e+01, + "real_time": 8.5547825321555138e+01, + "cpu_time": 8.5541207400000019e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log index 53ca4c7f..e20623f1 100644 --- a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:44:10+00:00 +2025-07-27T14:27:42+00:00 Running ./dl-op-matmul-transpose-b-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,15 +6,15 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.78, 3.82 +Load Average: 1.06, 1.17, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------------------- -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1045 ms 1044 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 277 ms 277 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 30.3 ms 19.4 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 84.9 ms 84.9 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1051 ms 1050 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 278 ms 278 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 32.3 ms 22.4 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 85.5 ms 85.5 ms 5 ---------- Verification ---------- scalar_O3 PASS scalar_O3_omp PASS diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json index 95272137..fef15b7e 100644 --- a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:44:09+00:00", + "date": "2025-07-27T14:27:41+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-tosa-transpose-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00684,1.78223,3.82031], + "load_avg": [1.05566,1.17383,1.29248], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 2.4949089437723160e+01, - "cpu_time": 1.7676270800000001e+01, + "real_time": 2.6390058174729347e+01, + "cpu_time": 2.1415277600000003e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.7852439358830452e+01, - "cpu_time": 1.5263622600000005e+01, + "real_time": 1.8864421173930168e+01, + "cpu_time": 1.3372037199999998e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log index 550769b3..c9f95ebd 100644 --- a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log @@ -1,4 +1,4 @@ -2025-06-01T09:44:09+00:00 +2025-07-27T14:27:41+00:00 Running ./dl-op-tosa-transpose-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,12 +6,12 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.78, 3.82 +Load Average: 1.06, 1.17, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------- -DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 24.9 ms 17.7 ms 5 -DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 17.9 ms 15.3 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 26.4 ms 21.4 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 18.9 ms 13.4 ms 5 ---------- Verification ---------- scalar_O3 PASS diff --git a/test_result/geminiprocessing/build.log b/test_result/geminiprocessing/build.log new file mode 100644 index 00000000..aa1b4a29 --- /dev/null +++ b/test_result/geminiprocessing/build.log @@ -0,0 +1,655 @@ +[1/21] Creating directories for 'project_googlebenchmark' +[2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +riscv64-unknown-linux-gnu-gcc -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4': +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given + 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); + | ^ +In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23: +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:251: note: macro "gemmini_extended_config_ex" defined here + 251 | #define gemmini_extended_config_ex(dataflow, sys_act, sys_shift, relu6_shift, A_stride, A_transpose, B_transpose) \ + | +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: error: 'gemmini_extended_config_ex' undeclared (first use in this function) + 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: note: each undeclared identifier is reported only once for each function it appears in +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:35:18: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] + 35 | int32_t *res = (int32_t*) ((uint32_t)gemm_acc_malloc (16 * 16 * 4 * 4 * sizeof(int32_t))); + | ^ +In file included from /home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:20, + from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23: +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload' + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:119: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload' + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload' + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:125: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload' + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload' + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload' + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload' + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload' + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload' + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:126: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload' + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload' + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:132: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload' + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload' + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload' + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload' + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload' + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload' + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload' + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload' + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload' + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload' + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload' + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload' + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload' + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload' + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload' + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload' + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload' + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload' + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload' + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload' + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload' + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:89: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:9: note: in expansion of macro 'gemmini_extended_mvout' + 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:9: note: in expansion of macro 'gemmini_extended_mvout' + 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:9: note: in expansion of macro 'gemmini_extended_mvout' + 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:9: note: in expansion of macro 'gemmini_extended_mvout' + 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:105:17: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 105 | gemm_acc_free((uint32_t)(res)); + | ^ +[3/21] Building CXX object benchmarks/Gemmini/ResNet-101/CMakeFiles/CRunnerUtils.dir/CRunnerUtils.cpp.o +[4/21] Generating buddy_matmul.o +[5/21] Performing download step (git clone) for 'project_googlebenchmark' +Cloning into 'project_googlebenchmark'... +HEAD is now at f91b6b4 bump version to 1.6 in preparation for release +[6/21] Generating resnet-101.o +ninja: build stopped: subcommand failed. diff --git a/test_result/geminiprocessing/cmake_configure.log b/test_result/geminiprocessing/cmake_configure.log new file mode 100644 index 00000000..a3a42f37 --- /dev/null +++ b/test_result/geminiprocessing/cmake_configure.log @@ -0,0 +1,37 @@ +-- The CXX compiler identification is GNU 9.2.0 +-- The C compiler identification is GNU 9.2.0 +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Check for working CXX compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-g++ - skipped +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Detecting C compiler ABI info +-- Detecting C compiler ABI info - done +-- Check for working C compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-gcc - skipped +-- Detecting C compile features +-- Detecting C compile features - done +-- Configuring Target Architecture: avx512f +-- Configuring Target Triple: x86_64-unknown-linux-gnu +-- Configuring benchmarks: google +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed +-- Looking for pthread_create in pthreads +-- Looking for pthread_create in pthreads - not found +-- Looking for pthread_create in pthread +-- Looking for pthread_create in pthread - found +-- Found Threads: TRUE +-- Performing Test HAVE_SSE +-- Performing Test HAVE_SSE - Failed +-- SSE support - no +-- Performing Test HAVE_AVX2 +-- Performing Test HAVE_AVX2 - Failed +-- AVX2 support - no +-- Performing Test HAVE_AVX512 +-- Performing Test HAVE_AVX512 - Failed +-- AVX512 support - no +-- Performing Test HAVE_NEON +-- Performing Test HAVE_NEON - Failed +-- Arm Neon support - no +-- Configuring done +-- Generating done +-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build From 96302f6ec5246911b00bc5fe76f76781d2e7c64d Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 16:29:15 +0000 Subject: [PATCH 30/52] test --- .github/workflows/bench.yml | 37 +++++++++++++++++-- .../{2025-06-01 => 2025-07-27}/index.html | 0 site/benchmarks/latest/index.html | 2 +- site/deeplearning/dl-layer-ffn-benchmark.html | 14 +++---- .../dl-layer-rmsnorm-benchmark.html | 14 +++---- .../dl-layer-selfattention-benchmark.html | 10 ++--- .../dl-model-lenet-benchmark.html | 16 ++++---- .../dl-model-mobilenetv3-benchmark.html | 14 +++---- .../dl-model-resnet18-benchmark.html | 14 +++---- .../dl-model-tinyllama-benchmark.html | 18 ++++----- .../dl-model-whisper-benchmark.html | 14 +++---- .../dl-op-linalg-arithaddf-benchmark.html | 14 +++---- .../dl-op-linalg-arithdivf-benchmark.html | 14 +++---- .../dl-op-linalg-arithmulf-benchmark.html | 14 +++---- .../dl-op-linalg-arithnegf-benchmark.html | 14 +++---- .../dl-op-linalg-arithsubf-benchmark.html | 14 +++---- .../dl-op-linalg-batch-matmul-benchmark.html | 30 +++++++-------- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 10 ++--- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 18 ++++----- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 14 +++---- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 18 ++++----- .../dl-op-linalg-mathexp-benchmark.html | 14 +++---- .../dl-op-linalg-mathfpow-benchmark.html | 14 +++---- .../dl-op-linalg-mathrsqrt-benchmark.html | 14 +++---- .../dl-op-linalg-matmul-benchmark.html | 26 ++++++------- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 14 +++---- .../dl-op-linalg-reduceaddf-benchmark.html | 6 +-- .../dl-op-linalg-reducemaxf-benchmark.html | 6 +-- ...-linalg-softmax-exp-sum-div-benchmark.html | 14 +++---- .../dl-op-matmul-transpose-b-benchmark.html | 22 +++++------ .../dl-op-tosa-transpose-benchmark.html | 14 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/vectorization/vectorization_matrix.html | 2 +- 60 files changed, 273 insertions(+), 242 deletions(-) rename site/benchmarks/{2025-06-01 => 2025-07-27}/index.html (100%) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index c1df8c03..8bec6144 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -34,6 +34,13 @@ jobs: - name: Build & run benchmarks run: | /home/quliu/buddy-complier-workspace/run_docker.sh + # ------------------------------------------------------------ + # 2½) decide which date folder we’re about to publish + # ------------------------------------------------------------ + - name: Set BENCH_DATE env var + run: | + # Pick today's date (YYYY-MM-DD) - same pattern your scripts use + echo "BENCH_DATE=$(date +'%Y-%m-%d')" >> "$GITHUB_ENV" # ------------------------------------------------------------ # 3-5) (UNCHANGED) upload raw logs, convert to HTML, deploy Pages @@ -73,15 +80,39 @@ jobs: with: path: /home/quliu/buddy-complier-workspace/buddy-benchmark/site - - name: Push to buddy-compiler.github.io + - name: Build index.html for this run + run: | + run_root="$BENCH_DIR" + cat > "$run_root/index.html" <<'EOF' + + + Buddy-Benchmark run +

            Benchmark results

            +
              + {% for f in site.static_files %} + {% if f.path contains page.url and f.extname == ".html" + and f.name != "index.html" %} +
            • {{ f.name }}
            • + {% endif %} + {% endfor %} +
            + EOF + + - name: Push benchmark results uses: peaceiris/actions-gh-pages@v4 if: github.event_name == 'push' && github.ref == 'refs/heads/main' with: personal_token: ${{ secrets.BUDDY_SITE_PAT }} external_repository: buddy-compiler/buddy-compiler.github.io - publish_dir: /home/quliu/buddy-complier-workspace/buddy-benchmark/site + + # POINT publish_dir *at the date folder itself* … + publish_dir: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks/${{ env.BENCH_DATE }} + + # … and mount it under a single benchmarks// folder on Pages destination_dir: benchmarks/${{ github.sha }} + publish_branch: master keep_files: true - commit_message: Deploy benchmark results for ${{ github.sha }} (from ${{ github.repository }}) + enable_jekyll: true # do NOT let the action create .nojekyll + commit_message: "Deploy benchmark results for ${{ github.sha }}" diff --git a/site/benchmarks/2025-06-01/index.html b/site/benchmarks/2025-07-27/index.html similarity index 100% rename from site/benchmarks/2025-06-01/index.html rename to site/benchmarks/2025-07-27/index.html diff --git a/site/benchmarks/latest/index.html b/site/benchmarks/latest/index.html index 929219d8..78e7713b 100644 --- a/site/benchmarks/latest/index.html +++ b/site/benchmarks/latest/index.html @@ -1 +1 @@ - + diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index 2b2d12fe..f10329ff 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            - -
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06530.065310,788
            DL_LAYER_FFN/Auto_Vectorization0.0270.02725,830
            +DL_LAYER_FFN/Scalar0.06540.065410,762 +DL_LAYER_FFN/Auto_Vectorization0.02710.027125,673
            Console output -
            2025-06-01T09:43:17+00:00
            +
            2025-07-27T14:26:49+00:00
             Running ./dl-layer-ffn-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-layer-ffn-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.93, 3.98 +Load Average: 1.04, 1.19, 1.31 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------- -DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10788 -DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25830 +DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10762 +DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25673 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index 2f1c7c9b..34fcb666 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-layer-rmsnorm-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196355,522
            DL_LAYER_RMSNORM/Auto_Vectorization0.0009070.000907763,038
            +DL_LAYER_RMSNORM/Scalar0.001960.00196356,202 +DL_LAYER_RMSNORM/Auto_Vectorization0.0009150.000915751,546
            Console output -
            2025-06-01T09:43:21+00:00
            +
            2025-07-27T14:26:53+00:00
             Running ./dl-layer-rmsnorm-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-layer-rmsnorm-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.93, 3.98 +Load Average: 1.03, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------ -DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 355522 -DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 763038 +DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 356202 +DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 751546 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index e965d890..1114f932 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-layer-selfattention-benchmark.json

            -
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            DL_LAYER_ATTENTION/Auto_Vectorization1.571.57447
            +DL_LAYER_ATTENTION/Auto_Vectorization1.571.57446
            Console output -
            2025-06-01T09:43:19+00:00
            +
            2025-07-27T14:26:51+00:00
             Running ./dl-layer-selfattention-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-layer-selfattention-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.93, 3.98 +Load Average: 1.04, 1.19, 1.31 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- DL_LAYER_ATTENTION/Scalar 4.69 ms 4.69 ms 149 -DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 447 +DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 446 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index 55e8138e..d6bed7ac 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-model-lenet-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1610.1614,427
            DL_MODEL_LENET/Buddy_Vectorization0.1360.1365,106
            +DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304 +DL_MODEL_LENET/Buddy_Vectorization0.1370.1375,022
            Console output -
            2025-06-01T09:39:21+00:00
            +
            2025-07-27T14:22:52+00:00
             Running ./dl-model-lenet-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,15 +24,15 @@ 

            dl-model-lenet-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.00, 3.05, 4.86 +Load Average: 1.40, 1.39, 1.40 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------- -DL_MODEL_LENET/Auto_Vectorization 0.161 ms 0.161 ms 4427 -DL_MODEL_LENET/Buddy_Vectorization 0.136 ms 0.136 ms 5106 +DL_MODEL_LENET/Auto_Vectorization 0.165 ms 0.165 ms 4304 +DL_MODEL_LENET/Buddy_Vectorization 0.137 ms 0.137 ms 5022 ----------------------------------------------------------- Correctness Verification: -Transform case: PASS +Transform case: FAIL -----------------------------------------------------------
            \ No newline at end of file diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index 6be30cee..6feceebc 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-model-mobilenetv3-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar35.935.918
            BM_MobileNet_V3/BM_MobileNet_V3_conv_opt32.732.722
            +BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119 +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt333321
            Console output -
            2025-06-01T09:39:18+00:00
            +
            2025-07-27T14:22:49+00:00
             Running ./dl-model-mobilenetv3-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-model-mobilenetv3-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.00, 3.05, 4.86 +Load Average: 1.40, 1.39, 1.40 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------- -BM_MobileNet_V3/BM_MobileNet_V3_scalar 35.9 ms 35.9 ms 18 -BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.7 ms 32.7 ms 22 +BM_MobileNet_V3/BM_MobileNet_V3_scalar 37.1 ms 37.1 ms 19 +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 33.0 ms 33.0 ms 21 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index 9fb60049..145ca157 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-model-resnet18-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7197181
            DL_MODEL_Resnet18/Buddy_Vectorization7267181
            +DL_MODEL_Resnet18/Auto_Vectorization7317231 +DL_MODEL_Resnet18/Buddy_Vectorization7297221
            Console output -
            2025-06-01T09:43:14+00:00
            +
            2025-07-27T14:26:46+00:00
             Running ./dl-model-resnet18-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-model-resnet18-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.94, 4.00 +Load Average: 1.04, 1.19, 1.31 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -DL_MODEL_Resnet18/Auto_Vectorization 719 ms 718 ms 1 -DL_MODEL_Resnet18/Buddy_Vectorization 726 ms 718 ms 1 +DL_MODEL_Resnet18/Auto_Vectorization 731 ms 723 ms 1 +DL_MODEL_Resnet18/Buddy_Vectorization 729 ms 722 ms 1 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index 6a16e291..e4f70956 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,14 +10,14 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-model-tinyllama-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.73e+051.73e+051
            DL_MODEL_TINYLLAMA/matmul_opt1.05e+041.05e+041
            DL_MODEL_TINYLLAMA/matmul_opt_omp8.22e+037.61e+031
            +DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051 +DL_MODEL_TINYLLAMA/matmul_opt1e+041e+041 +DL_MODEL_TINYLLAMA/matmul_opt_omp7.84e+037.2e+031
            Console output -
            2025-06-01T09:33:00+00:00
            +
            2025-07-27T14:17:33+00:00
             Running ./dl-model-tinyllama-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -25,14 +25,14 @@ 

            dl-model-tinyllama-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.76, 5.10, 6.18 +Load Average: 1.70, 1.92, 1.54 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ---------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------- -DL_MODEL_TINYLLAMA/scalar 172638 ms 172634 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt 10491 ms 10491 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt_omp 8219 ms 7607 ms 1 +DL_MODEL_TINYLLAMA/scalar 139185 ms 139179 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt 10038 ms 10038 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt_omp 7836 ms 7201 ms 1 ---------- Verification ---------- matmul_opt PASS matmul_opt_omp PASS diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index 11fad0ea..e9f6ec48 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-model-whisper-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization7.92e+047.92e+041
            DL_MODEL_Whisper/Buddy_Vectorization3.69e+043.69e+041
            +DL_MODEL_Whisper/Auto_Vectorization8e+048e+041 +DL_MODEL_Whisper/Buddy_Vectorization3.67e+043.67e+041
            Console output -
            2025-06-01T09:39:22+00:00
            +
            2025-07-27T14:22:54+00:00
             Running ./dl-model-whisper-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-model-whisper-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.00, 3.04, 4.84 +Load Average: 1.45, 1.40, 1.40 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_MODEL_Whisper/Auto_Vectorization 79216 ms 79213 ms 1 -DL_MODEL_Whisper/Buddy_Vectorization 36910 ms 36904 ms 1 +DL_MODEL_Whisper/Auto_Vectorization 79983 ms 79980 ms 1 +DL_MODEL_Whisper/Buddy_Vectorization 36713 ms 36700 ms 1 ----------------------------------------------------------- Correctness Verification for Output1: PASS Correctness Verification for Output2: FAIL diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index 04926c66..79eb0781 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-arithaddf-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02990.029923,357
            BM_ADDF_AutoVectorization0.003990.00399164,695
            +BM_ADDF_SCALAR0.02950.029523,451 +BM_ADDF_AutoVectorization0.0040.004174,931
            Console output -
            2025-06-01T09:43:50+00:00
            +
            2025-07-27T14:27:23+00:00
             Running ./dl-op-linalg-arithaddf-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-arithaddf-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.84, 3.88 +Load Average: 1.07, 1.18, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_ADDF_SCALAR 0.030 ms 0.030 ms 23357 -BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 164695 +BM_ADDF_SCALAR 0.030 ms 0.030 ms 23451 +BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 174931 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 16ff5cb1..8d7a823b 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-arithdivf-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02930.029323,918
            BM_DIVF_AutoVectorization0.01030.010373,794
            +BM_DIVF_SCALAR0.02980.029823,358 +BM_DIVF_AutoVectorization0.009490.0094967,517
            Console output -
            2025-06-01T09:43:53+00:00
            +
            2025-07-27T14:27:25+00:00
             Running ./dl-op-linalg-arithdivf-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-arithdivf-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.82, 3.87 +Load Average: 1.07, 1.18, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_DIVF_SCALAR 0.029 ms 0.029 ms 23918 -BM_DIVF_AutoVectorization 0.010 ms 0.010 ms 73794 +BM_DIVF_SCALAR 0.030 ms 0.030 ms 23358 +BM_DIVF_AutoVectorization 0.009 ms 0.009 ms 67517 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index a6b49a6e..3c9cf63a 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-arithmulf-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02960.029623,548
            BM_MULF_AutoVectorization0.003990.00399146,698
            +BM_MULF_SCALAR0.02980.029823,441 +BM_MULF_AutoVectorization0.0040.004175,263
            Console output -
            2025-06-01T09:43:54+00:00
            +
            2025-07-27T14:27:27+00:00
             Running ./dl-op-linalg-arithmulf-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-arithmulf-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.82, 3.87 +Load Average: 1.07, 1.18, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_MULF_SCALAR 0.030 ms 0.030 ms 23548 -BM_MULF_AutoVectorization 0.004 ms 0.004 ms 146698 +BM_MULF_SCALAR 0.030 ms 0.030 ms 23441 +BM_MULF_AutoVectorization 0.004 ms 0.004 ms 175263 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index a092ba39..ae10799c 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-arithnegf-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02280.022830,522
            BM_NEGF_AutoVectorization0.002490.00249279,150
            +BM_NEGF_SCALAR0.02250.022530,969 +BM_NEGF_AutoVectorization0.002460.00246277,205
            Console output -
            2025-06-01T09:43:57+00:00
            +
            2025-07-27T14:27:29+00:00
             Running ./dl-op-linalg-arithnegf-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-arithnegf-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.81, 3.85 +Load Average: 1.07, 1.18, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_NEGF_SCALAR 0.023 ms 0.023 ms 30522 -BM_NEGF_AutoVectorization 0.002 ms 0.002 ms 279150 +BM_NEGF_SCALAR 0.023 ms 0.023 ms 30969 +BM_NEGF_AutoVectorization 0.002 ms 0.002 ms 277205 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index 02f5e54c..f5519bb4 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-arithsubf-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02930.029323,583
            BM_SUBF_AutoVectorization0.003990.00399175,569
            +BM_SUBF_SCALAR0.02940.029423,509 +BM_SUBF_AutoVectorization0.003990.00399175,223
            Console output -
            2025-06-01T09:43:59+00:00
            +
            2025-07-27T14:27:31+00:00
             Running ./dl-op-linalg-arithsubf-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-arithsubf-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.81, 3.85 +Load Average: 1.07, 1.18, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_SUBF_SCALAR 0.029 ms 0.029 ms 23583 -BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 175569 +BM_SUBF_SCALAR 0.029 ms 0.029 ms 23509 +BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 175223 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 99c81a5b..737ed6aa 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,18 +10,18 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            - - - + + + - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.53e+033.53e+031
            DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:19749741
            DL_OPS_BATCH_MATMUL/Vectorization/iterations:11911911
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:19769761
            DL_OPS_BATCH_MATMUL/Vectorization/iterations:11951951
            DL_OPS_BATCH_MATMUL/Tile/iterations:11091091
            DL_OPS_BATCH_MATMUL/SCF/iterations:11171171
            DL_OPS_BATCH_MATMUL/BROADCAST/iterations:13513511
            DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:180.632.11
            +DL_OPS_BATCH_MATMUL/SCF/iterations:11181181 +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:13563561 +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:16232.11
            Console output -
            2025-06-01T09:43:41+00:00
            +
            2025-07-27T14:27:13+00:00
             Running ./dl-op-linalg-batch-matmul-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -29,18 +29,18 @@ 

            dl-op-linalg-batch-matmul-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.87, 3.91 +Load Average: 1.08, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3529 ms 3529 ms 1 -DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 974 ms 974 ms 1 -DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 191 ms 191 ms 1 +DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3536 ms 3536 ms 1 +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 976 ms 976 ms 1 +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 195 ms 195 ms 1 DL_OPS_BATCH_MATMUL/Tile/iterations:1 109 ms 109 ms 1 -DL_OPS_BATCH_MATMUL/SCF/iterations:1 117 ms 117 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 351 ms 351 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 80.6 ms 32.1 ms 1 +DL_OPS_BATCH_MATMUL/SCF/iterations:1 118 ms 118 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 356 ms 356 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 62.0 ms 32.1 ms 1 ---------- Verification ---------- Tile PASS SCF PASS diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index fe6b276a..61da4097 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            -
            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            BM_Conv2DNchwFchw_Im2col10.110.169
            +BM_Conv2DNchwFchw_Im2col6.86.8101
            Console output -
            2025-06-01T09:43:34+00:00
            +
            2025-07-27T14:27:06+00:00
             Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.88, 3.93 +Load Average: 1.10, 1.20, 1.31 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- BM_Conv2DNchwFchw_SCALAR 283 ms 283 ms 2 -BM_Conv2DNchwFchw_Im2col 10.1 ms 10.1 ms 69 +BM_Conv2DNchwFchw_Im2col 6.80 ms 6.80 ms 101 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 94cfd45c..367a8b74 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,15 +10,15 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            - + - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.572.55
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:59.359.355
            DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:51.731.735
            DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:51.731.735
            +DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:51.821.825 +DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:51.821.825
            Console output -
            2025-06-01T09:43:38+00:00
            +
            2025-07-27T14:27:10+00:00
             Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -26,15 +26,15 @@ 

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.87, 3.91 +Load Average: 1.09, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------------- -DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 72.5 ms 72.5 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 72.3 ms 72.3 ms 5 DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.35 ms 9.35 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.73 ms 1.73 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.73 ms 1.73 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.82 ms 1.82 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.82 ms 1.82 ms 5 ---------- Verification ---------- auto_vectorization PASS vectorization PASS diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index 6a7356be..a36c1b09 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.432.421
            BM_CONV_2D_NHWC_HWCF_AutoVectorization6.166.16114
            +BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322 +BM_CONV_2D_NHWC_HWCF_AutoVectorization6.136.13113
            Console output -
            2025-06-01T09:43:36+00:00
            +
            2025-07-27T14:27:08+00:00
             Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.88, 3.93 +Load Average: 1.09, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------- -BM_CONV_2D_NHWC_HWCF_SCALAR 32.4 ms 32.4 ms 21 -BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.16 ms 6.16 ms 114 +BM_CONV_2D_NHWC_HWCF_SCALAR 32.3 ms 32.3 ms 22 +BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.13 ms 6.13 ms 113 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 6e8dcbc6..9271eb04 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,14 +10,14 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:57.277.275
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:51.681.685
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:50.1270.1275
            +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:51.711.715 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:50.1250.1255
            Console output -
            2025-06-01T09:43:38+00:00
            +
            2025-07-27T14:27:11+00:00
             Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -25,14 +25,14 @@ 

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.87, 3.91 +Load Average: 1.09, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------ -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 7.27 ms 7.27 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.68 ms 1.68 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.127 ms 0.127 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 4.25 ms 4.25 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.71 ms 1.71 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.125 ms 0.125 ms 5 ---------- Verification ---------- auto_vectorization PASS vectorization PASS diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index cebd6ba5..f1bda132 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-mathexp-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,072
            BM_EXP_AutoVectorization0.03160.031622,245
            +BM_EXP_SCALAR0.04560.045615,225 +BM_EXP_AutoVectorization0.03160.031622,248
            Console output -
            2025-06-01T09:44:04+00:00
            +
            2025-07-27T14:27:37+00:00
             Running ./dl-op-linalg-mathexp-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-mathexp-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.80, 3.84 +Load Average: 1.06, 1.18, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- -BM_EXP_SCALAR 0.046 ms 0.046 ms 15072 -BM_EXP_AutoVectorization 0.032 ms 0.032 ms 22245 +BM_EXP_SCALAR 0.046 ms 0.046 ms 15225 +BM_EXP_AutoVectorization 0.032 ms 0.032 ms 22248 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 49bfdb44..8dc4b5ef 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-mathfpow-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08520.08528,120
            BM_FPOW_AutoVectorization0.05690.056912,142
            +BM_FPOW_SCALAR0.08410.08418,255 +BM_FPOW_AutoVectorization0.05690.056912,305
            Console output -
            2025-06-01T09:44:01+00:00
            +
            2025-07-27T14:27:33+00:00
             Running ./dl-op-linalg-mathfpow-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-mathfpow-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.81, 3.85 +Load Average: 1.06, 1.18, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_FPOW_SCALAR 0.085 ms 0.085 ms 8120 -BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12142 +BM_FPOW_SCALAR 0.084 ms 0.084 ms 8255 +BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12305 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index cf9b5f25..518b07c9 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,624
            BM_RSQRT_AutoVectorization0.004340.00434160,866
            +BM_RSQRT_SCALAR0.07280.07289,537 +BM_RSQRT_AutoVectorization0.004350.00435160,927
            Console output -
            2025-06-01T09:44:02+00:00
            +
            2025-07-27T14:27:35+00:00
             Running ./dl-op-linalg-mathrsqrt-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-mathrsqrt-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.80, 3.84 +Load Average: 1.06, 1.18, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9624 -BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 160866 +BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9537 +BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 160927 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index 13690791..e28828e8 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,16 +10,16 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-matmul-benchmark.json

            - - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:14.1e+034.1e+031
            DL_OPS_MATMUL/scalar_O3/iterations:13.12e+033.12e+031
            DL_OPS_MATMUL/tile/iterations:11101101
            DL_OPS_MATMUL/vec/iterations:159591
            DL_OPS_MATMUL/vec_omp/iterations:128.8111
            +DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031 +DL_OPS_MATMUL/scalar_O3/iterations:13.21e+033.21e+031 +DL_OPS_MATMUL/tile/iterations:11171171 +DL_OPS_MATMUL/vec/iterations:159.959.91 +DL_OPS_MATMUL/vec_omp/iterations:1229.111
            Console output -
            2025-06-01T09:43:23+00:00
            +
            2025-07-27T14:26:55+00:00
             Running ./dl-op-linalg-matmul-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -27,16 +27,16 @@ 

            dl-op-linalg-matmul-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.91, 3.96 +Load Average: 1.03, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_OPS_MATMUL/scalar_O0/iterations:1 4096 ms 4096 ms 1 -DL_OPS_MATMUL/scalar_O3/iterations:1 3124 ms 3124 ms 1 -DL_OPS_MATMUL/tile/iterations:1 110 ms 110 ms 1 -DL_OPS_MATMUL/vec/iterations:1 59.0 ms 59.0 ms 1 -DL_OPS_MATMUL/vec_omp/iterations:1 28.8 ms 11.0 ms 1 +DL_OPS_MATMUL/scalar_O0/iterations:1 3933 ms 3933 ms 1 +DL_OPS_MATMUL/scalar_O3/iterations:1 3212 ms 3212 ms 1 +DL_OPS_MATMUL/tile/iterations:1 117 ms 117 ms 1 +DL_OPS_MATMUL/vec/iterations:1 59.9 ms 59.9 ms 1 +DL_OPS_MATMUL/vec_omp/iterations:1 22.0 ms 9.11 ms 1 ---------- Verification ---------- tile PASS vec PASS diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index 5f4e3e45..93c6fc59 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2332,993
            BM_POOLING_NHWC_SUM_AutoVectorization0.04140.041416,954
            +BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002 +BM_POOLING_NHWC_SUM_AutoVectorization0.04140.041416,950
            Console output -
            2025-06-01T09:43:38+00:00
            +
            2025-07-27T14:27:11+00:00
             Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.87, 3.91 +Load Average: 1.09, 1.19, 1.30 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 2993 -BM_POOLING_NHWC_SUM_AutoVectorization 0.041 ms 0.041 ms 16954 +BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 3002 +BM_POOLING_NHWC_SUM_AutoVectorization 0.041 ms 0.041 ms 16950 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index 025cb892..2a08f572 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,10 +10,10 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 15:40:04 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output -
            2025-06-01T09:44:07+00:00
            +
            2025-07-27T14:27:39+00:00
             Running ./dl-op-linalg-reduceaddf-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -21,6 +21,6 @@ 

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-06-01 11 L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.78, 3.82 +Load Average: 1.06, 1.17, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.

            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html index b3d37937..8ef6bf4c 100644 --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html @@ -10,10 +10,10 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 15:40:04 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output -
            2025-06-01T09:44:07+00:00
            +
            2025-07-27T14:27:39+00:00
             Running ./dl-op-linalg-reducemaxf-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -21,6 +21,6 @@ 

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-06-01 11 L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.78, 3.82 +Load Average: 1.06, 1.17, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.

            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html index 70648fb8..24312d18 100644 --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005590.00559123,186
            BM_SOFTMAXEXPSUMDIV_AutoVectorization0.003850.00385182,176
            +BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646 +BM_SOFTMAXEXPSUMDIV_AutoVectorization0.003850.00385181,826
            Console output -
            2025-06-01T09:44:07+00:00
            +
            2025-07-27T14:27:39+00:00
             Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.78, 3.82 +Load Average: 1.06, 1.17, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 123186 -BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 182176 +BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 121646 +BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 181826 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 8878068a..b3ac8835 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,15 +10,15 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-matmul-transpose-b-benchmark.json

            - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.04e+035
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:52772775
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:530.319.45
            DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:584.984.95
            +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:52782785 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:532.322.45 +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:585.585.55
            Console output -
            2025-06-01T09:44:10+00:00
            +
            2025-07-27T14:27:42+00:00
             Running ./dl-op-matmul-transpose-b-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -26,15 +26,15 @@ 

            dl-op-matmul-transpose-b-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.78, 3.82 +Load Average: 1.06, 1.17, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------------------- -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1045 ms 1044 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 277 ms 277 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 30.3 ms 19.4 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 84.9 ms 84.9 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1051 ms 1050 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 278 ms 278 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 32.3 ms 22.4 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 85.5 ms 85.5 ms 5 ---------- Verification ---------- scalar_O3 PASS scalar_O3_omp PASS diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index e1189cd9..8a00aa6d 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,13 +10,13 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-06-01 11:51:18 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 15:40:04 UTC

            dl-op-tosa-transpose-benchmark.json

            - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:524.917.75
            DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:517.915.35
            +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45 +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:518.913.45
            Console output -
            2025-06-01T09:44:09+00:00
            +
            2025-07-27T14:27:41+00:00
             Running ./dl-op-tosa-transpose-benchmark
             Run on (24 X 5100 MHz CPU s)
             CPU Caches:
            @@ -24,13 +24,13 @@ 

            dl-op-tosa-transpose-benchmark.json

            L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.78, 3.82 +Load Average: 1.06, 1.17, 1.29 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------- -DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 24.9 ms 17.7 ms 5 -DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 17.9 ms 15.3 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 26.4 ms 21.4 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 18.9 ms 13.4 ms 5 ---------- Verification ---------- scalar_O3 PASS
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a7170e4e..51daafa4 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index c3fe25af..fa5b4d1a 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 78583280..7f6a92e2 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8de2abfa..46272c0e 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e63c263e..9753d4b9 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 0578ab54..399e3c28 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index fcecaf28..44bde052 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5863c25f..3ba8a292 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 184cabd9..e3656280 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 05b9941f..bc33494e 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b5c86a18..ccb70e21 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5b96ad7d..0532a137 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 6091af1b..27f81737 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 822c611d..b3702bed 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 51800cd5..bede4f4d 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 06b583b9..864c9816 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 61778e2a..8b667850 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 6704c12e..1f19bf32 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 7a7ba37b..83c6af19 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 1cf7bd35..07a4fe48 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 76402b2a..e1722229 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 1547a6d4..c085a37f 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 4b980272..21450047 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index ded7d114..f74189cf 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b1a3173e..14bb2f84 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8ece84be..79e9a174 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 2e6263cd..a6af2604 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index cb9f1665..675188a3 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-06-01 11:51:18 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index 92c881d4..0759ca4d 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-06-01 11:51:18 UTC

            +

            vectorization/vectorization_matrix.json

            2025-07-27 15:40:04 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From e80d1a702fbd3c02152a4a29b61a2cf4fb5e8796 Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 16:33:57 +0000 Subject: [PATCH 31/52] test --- .github/workflows/bench.yml | 29 +++++++++---------- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/vectorization/vectorization_matrix.html | 2 +- 58 files changed, 70 insertions(+), 73 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 8bec6144..1e7608df 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -37,10 +37,11 @@ jobs: # ------------------------------------------------------------ # 2½) decide which date folder we’re about to publish # ------------------------------------------------------------ - - name: Set BENCH_DATE env var + - name: Set BENCH_DATE and BENCH_DIR run: | - # Pick today's date (YYYY-MM-DD) - same pattern your scripts use - echo "BENCH_DATE=$(date +'%Y-%m-%d')" >> "$GITHUB_ENV" + bench_date=$(date +'%Y-%m-%d') + echo "BENCH_DATE=$bench_date" >> "$GITHUB_ENV" + echo "BENCH_DIR=/home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks/$bench_date" >> "$GITHUB_ENV" # ------------------------------------------------------------ # 3-5) (UNCHANGED) upload raw logs, convert to HTML, deploy Pages @@ -82,16 +83,18 @@ jobs: - name: Build index.html for this run run: | - run_root="$BENCH_DIR" + run_root="${{ env.BENCH_DIR }}" + mkdir -p "$run_root" # just in case cat > "$run_root/index.html" <<'EOF' - - - Buddy-Benchmark run + --- + layout: default + title: Benchmark run + ---

            Benchmark results

              {% for f in site.static_files %} {% if f.path contains page.url and f.extname == ".html" - and f.name != "index.html" %} + and f.name != "index.html" %}
            • {{ f.name }}
            • {% endif %} {% endfor %} @@ -100,19 +103,13 @@ jobs: - name: Push benchmark results uses: peaceiris/actions-gh-pages@v4 - if: github.event_name == 'push' && github.ref == 'refs/heads/main' with: personal_token: ${{ secrets.BUDDY_SITE_PAT }} external_repository: buddy-compiler/buddy-compiler.github.io - - # POINT publish_dir *at the date folder itself* … - publish_dir: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks/${{ env.BENCH_DATE }} - - # … and mount it under a single benchmarks// folder on Pages + publish_dir: "${{ env.BENCH_DIR }}" # the directory we just filled destination_dir: benchmarks/${{ github.sha }} - publish_branch: master keep_files: true - enable_jekyll: true # do NOT let the action create .nojekyll + enable_jekyll: true # *no* .nojekyll commit_message: "Deploy benchmark results for ${{ github.sha }}" diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index f10329ff..fc2f34b1 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

              deeplearning/dl-layer-ffn-benchmark.json

              2025-07-27 15:40:04 UTC

              +

              deeplearning/dl-layer-ffn-benchmark.json

              2025-07-27 16:29:45 UTC

              dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index 34fcb666..2eb5872f 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06540.065410,762
            diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index 1114f932..950b1d25 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196356,202
            diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index d6bed7ac..6f2fb5ea 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index 6feceebc..44c26dba 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304
            diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index 145ca157..5188e1e0 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119
            diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index e4f70956..6029c71c 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7317231
            diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index e9f6ec48..fde782d9 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051
            diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index 79eb0781..514b9d32 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization8e+048e+041
            diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 8d7a823b..85bf1762 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02950.029523,451
            diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 3c9cf63a..3c4555aa 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02980.029823,358
            diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index ae10799c..c528fa2f 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02980.029823,441
            diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index f5519bb4..8f3dceee 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02250.022530,969
            diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 737ed6aa..379e9d6c 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02940.029423,509
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index 61da4097..cf233316 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 367a8b74..fe9fcac0 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index a36c1b09..68333c60 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 9271eb04..03ce48b3 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322
            diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index f1bda132..81e9cf35 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255
            diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 8dc4b5ef..26501c17 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,225
            diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index 518b07c9..f40ef6c5 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08410.08418,255
            diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index e28828e8..85b55b1d 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,537
            diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index 93c6fc59..f2935f16 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031
            diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index 2a08f572..67ddb348 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 16:29:45 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index 8ef6bf4c..0c8fb30a 100644
            --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 16:29:45 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 24312d18..71042b19 100644
            --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002
            diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index b3ac8835..626031c9 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646
            diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index 8a00aa6d..11670ec0 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 15:40:04 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 16:29:45 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 51daafa4..e11e3916 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index fa5b4d1a..6b32a177 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 7f6a92e2..1fa2bea1 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 46272c0e..de31ff44 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 9753d4b9..6ebea17a 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 399e3c28..05aefe6f 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 44bde052..2d2a6395 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 3ba8a292..5e87a2cf 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e3656280..a502dc32 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index bc33494e..2043d94e 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index ccb70e21..a037f1fd 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 0532a137..fe39fd4c 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 27f81737..34301d2c 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index b3702bed..0694c7e2 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index bede4f4d..0c53d054 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 864c9816..735f9675 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 8b667850..55cc260d 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 1f19bf32..c28f27aa 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 83c6af19..19216401 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 07a4fe48..44d7e05a 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e1722229..82fe1ca6 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index c085a37f..f22dde31 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 21450047..992357a2 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index f74189cf..e93883ea 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 14bb2f84..0f7dd01e 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 79e9a174..5caf9924 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a6af2604..626a55a0 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 675188a3..5f052384 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 15:40:04 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index 0759ca4d..f45d0ad3 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-07-27 15:40:04 UTC

            +

            vectorization/vectorization_matrix.json

            2025-07-27 16:29:45 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From 2ae6fa062d1e53401be62989b4a3bb7606df171c Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 16:42:25 +0000 Subject: [PATCH 32/52] test --- .github/workflows/bench.yml | 14 +++++----- site/benchmarks/2025-07-27/index.html | 27 +++++++++---------- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/vectorization/vectorization_matrix.html | 2 +- 59 files changed, 78 insertions(+), 77 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 1e7608df..bfa44764 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -40,8 +40,10 @@ jobs: - name: Set BENCH_DATE and BENCH_DIR run: | bench_date=$(date +'%Y-%m-%d') - echo "BENCH_DATE=$bench_date" >> "$GITHUB_ENV" - echo "BENCH_DIR=/home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks/$bench_date" >> "$GITHUB_ENV" + echo "BENCH_DATE=$bench_date" \ + >> "$GITHUB_ENV" + echo "BENCH_DIR=$HOME/buddy-complier-workspace/buddy-benchmark/site/benchmarks/$bench_date" \ + >> "$GITHUB_ENV" # ------------------------------------------------------------ # 3-5) (UNCHANGED) upload raw logs, convert to HTML, deploy Pages @@ -81,10 +83,10 @@ jobs: with: path: /home/quliu/buddy-complier-workspace/buddy-benchmark/site - - name: Build index.html for this run + - name: Build landing page for this run run: | run_root="${{ env.BENCH_DIR }}" - mkdir -p "$run_root" # just in case + mkdir -p "$run_root" cat > "$run_root/index.html" <<'EOF' --- layout: default @@ -92,9 +94,9 @@ jobs: ---

            Benchmark results

              + {% assign here = page.dir %} {% for f in site.static_files %} - {% if f.path contains page.url and f.extname == ".html" - and f.name != "index.html" %} + {% if f.path startswith here and f.extname == ".html" and f.name != "index.html" %}
            • {{ f.name }}
            • {% endif %} {% endfor %} diff --git a/site/benchmarks/2025-07-27/index.html b/site/benchmarks/2025-07-27/index.html index 1641d47e..fdc9cf41 100644 --- a/site/benchmarks/2025-07-27/index.html +++ b/site/benchmarks/2025-07-27/index.html @@ -1,14 +1,13 @@ - - -

              Buddy-Benchmark results

                - -
              \ No newline at end of file +--- +layout: default +title: Benchmark run +--- +

              Benchmark results

              +
                +{% for f in site.static_files %} + {% if f.path contains page.url and f.extname == ".html" + and f.name != "index.html" %} +
              • {{ f.name }}
              • + {% endif %} +{% endfor %} +
              diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index fc2f34b1..b7ea05dc 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

              deeplearning/dl-layer-ffn-benchmark.json

              2025-07-27 16:29:45 UTC

              +

              deeplearning/dl-layer-ffn-benchmark.json

              2025-07-27 16:34:20 UTC

              dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index 2eb5872f..00a23f89 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06540.065410,762
            diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index 950b1d25..666b88e9 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196356,202
            diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index 6f2fb5ea..05705da7 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index 44c26dba..10b93e27 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304
            diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index 5188e1e0..1b787eb8 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119
            diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index 6029c71c..395d719a 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7317231
            diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index fde782d9..80a1caf1 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051
            diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index 514b9d32..2e15133c 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization8e+048e+041
            diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 85bf1762..6f37c0ff 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02950.029523,451
            diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 3c4555aa..51f698d7 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02980.029823,358
            diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index c528fa2f..ef56ffef 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02980.029823,441
            diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index 8f3dceee..d7e5032e 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02250.022530,969
            diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 379e9d6c..f5e16002 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02940.029423,509
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index cf233316..840bea42 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index fe9fcac0..75152d88 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index 68333c60..0bc8c359 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 03ce48b3..ad664ddc 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322
            diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index 81e9cf35..3851a1f2 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255
            diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 26501c17..f09dea3a 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,225
            diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index f40ef6c5..0b0192b3 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08410.08418,255
            diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index 85b55b1d..7b53c4dd 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,537
            diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index f2935f16..641b2a1a 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031
            diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index 67ddb348..0f501c62 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 16:34:20 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index 0c8fb30a..b4c3af92 100644
            --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 16:34:20 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 71042b19..37c67f15 100644
            --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002
            diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 626031c9..1bff71df 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646
            diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index 11670ec0..84a4c5ed 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 16:29:45 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 16:34:20 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e11e3916..d4aed250 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 6b32a177..8c9ae38f 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 1fa2bea1..7fd65fc1 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index de31ff44..c676577a 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 6ebea17a..b1917d3d 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 05aefe6f..4a617939 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 2d2a6395..011ea893 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5e87a2cf..52f010c6 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a502dc32..2d3346e9 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 2043d94e..43159c79 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a037f1fd..9568b379 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index fe39fd4c..ae030375 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 34301d2c..13b04940 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 0694c7e2..406306c1 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 0c53d054..e111f003 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 735f9675..10c408f2 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 55cc260d..af5c7e04 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index c28f27aa..e5f18b08 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 19216401..0328281d 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 44d7e05a..acff61c9 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 82fe1ca6..035eb658 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index f22dde31..ab06d2bd 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 992357a2..6fd53811 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index e93883ea..806bb083 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 0f7dd01e..5e7443e7 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5caf9924..24061274 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 626a55a0..b7ead294 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5f052384..837e0f54 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:29:45 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index f45d0ad3..f70f7f8a 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-07-27 16:29:45 UTC

            +

            vectorization/vectorization_matrix.json

            2025-07-27 16:34:20 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From 73f885aeb6d902b5a39449ea064c564b6fe9e5d0 Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 16:50:18 +0000 Subject: [PATCH 33/52] test --- .github/workflows/bench.yml | 18 +++++++++--------- site/benchmarks/2025-07-27/index.html | 4 ++-- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../deeplearning/dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...l-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...l-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...l-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...g-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...l-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...p-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...dom3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/vectorization/vectorization_matrix.html | 2 +- 59 files changed, 68 insertions(+), 68 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index bfa44764..a88660fd 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -40,10 +40,8 @@ jobs: - name: Set BENCH_DATE and BENCH_DIR run: | bench_date=$(date +'%Y-%m-%d') - echo "BENCH_DATE=$bench_date" \ - >> "$GITHUB_ENV" - echo "BENCH_DIR=$HOME/buddy-complier-workspace/buddy-benchmark/site/benchmarks/$bench_date" \ - >> "$GITHUB_ENV" + echo "BENCH_DATE=$bench_date" >> "$GITHUB_ENV" + echo "BENCH_DIR=$HOME/buddy-complier-workspace/buddy-benchmark/site/benchmarks/$bench_date" >> "$GITHUB_ENV" # ------------------------------------------------------------ # 3-5) (UNCHANGED) upload raw logs, convert to HTML, deploy Pages @@ -87,31 +85,33 @@ jobs: run: | run_root="${{ env.BENCH_DIR }}" mkdir -p "$run_root" + cat > "$run_root/index.html" <<'EOF' --- layout: default title: Benchmark run --- +

            Benchmark results

            +
              - {% assign here = page.dir %} {% for f in site.static_files %} - {% if f.path startswith here and f.extname == ".html" and f.name != "index.html" %} + {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %}
            • {{ f.name }}
            • {% endif %} {% endfor %}
            EOF + - name: Push benchmark results uses: peaceiris/actions-gh-pages@v4 with: personal_token: ${{ secrets.BUDDY_SITE_PAT }} external_repository: buddy-compiler/buddy-compiler.github.io - publish_dir: "${{ env.BENCH_DIR }}" # the directory we just filled + publish_dir: "${{ env.BENCH_DIR }}" # this now holds index.html + reports destination_dir: benchmarks/${{ github.sha }} publish_branch: master keep_files: true - enable_jekyll: true # *no* .nojekyll - commit_message: "Deploy benchmark results for ${{ github.sha }}" + enable_jekyll: true diff --git a/site/benchmarks/2025-07-27/index.html b/site/benchmarks/2025-07-27/index.html index fdc9cf41..4b22d903 100644 --- a/site/benchmarks/2025-07-27/index.html +++ b/site/benchmarks/2025-07-27/index.html @@ -4,9 +4,9 @@ ---

            Benchmark results

              +{% assign here = page.dir %} {% for f in site.static_files %} - {% if f.path contains page.url and f.extname == ".html" - and f.name != "index.html" %} + {% if f.path startswith here and f.extname == ".html" and f.name != "index.html" %}
            • {{ f.name }}
            • {% endif %} {% endfor %} diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index b7ea05dc..94080724 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

              deeplearning/dl-layer-ffn-benchmark.json

              2025-07-27 16:34:20 UTC

              +

              deeplearning/dl-layer-ffn-benchmark.json

              2025-07-27 16:42:50 UTC

              dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index 00a23f89..028d8812 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06540.065410,762
            diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index 666b88e9..b0094968 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196356,202
            diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index 05705da7..3fd96456 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index 10b93e27..f46c8ab9 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304
            diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index 1b787eb8..448c20d2 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119
            diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index 395d719a..36ebc848 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7317231
            diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index 80a1caf1..c08b77cc 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051
            diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index 2e15133c..f0b551e3 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization8e+048e+041
            diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 6f37c0ff..2228a54a 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02950.029523,451
            diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 51f698d7..53c66143 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02980.029823,358
            diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index ef56ffef..b0831594 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02980.029823,441
            diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index d7e5032e..1ff1231f 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02250.022530,969
            diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index f5e16002..c095bd98 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02940.029423,509
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index 840bea42..b818cdfe 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 75152d88..945d0864 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index 0bc8c359..51c56d7d 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index ad664ddc..3a396fb4 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322
            diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index 3851a1f2..a2c47a4e 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255
            diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index f09dea3a..410a271b 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,225
            diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index 0b0192b3..27a3f3f0 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08410.08418,255
            diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index 7b53c4dd..804965bb 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,537
            diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index 641b2a1a..d86a0fbf 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031
            diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index 0f501c62..6a81eb21 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 16:42:50 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index b4c3af92..21d7b2e5 100644
            --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 16:42:50 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 37c67f15..f26d3a1b 100644
            --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002
            diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 1bff71df..b1655bf3 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646
            diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index 84a4c5ed..5fe95a6a 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 16:34:20 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 16:42:50 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index d4aed250..cd76ab64 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8c9ae38f..846f90c1 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 7fd65fc1..153a7403 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index c676577a..26603ca4 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b1917d3d..6a746e10 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 4a617939..a0e016e1 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 011ea893..01cbe4bd 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 52f010c6..cc213c52 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 2d3346e9..a4deb625 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 43159c79..a2c7b505 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 9568b379..4031f8a9 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index ae030375..6026720f 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 13b04940..cc1c57b2 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 406306c1..0b7702a3 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e111f003..808832f6 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 10c408f2..b563cd76 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index af5c7e04..efea4fe9 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index e5f18b08..c2163bcf 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 0328281d..d2049bb4 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index acff61c9..dd7cfd34 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 035eb658..76d12eff 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index ab06d2bd..2a066878 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 6fd53811..58eb1148 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 806bb083..b55c36a8 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 5e7443e7..a1e4b9fd 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 24061274..ea252e8e 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b7ead294..78b34ad4 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 837e0f54..88b77b9b 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:34:20 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index f70f7f8a..c77a525d 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-07-27 16:34:20 UTC

            +

            vectorization/vectorization_matrix.json

            2025-07-27 16:42:50 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From 6a005f14c4220751b31c158cdc9300c467630b15 Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 17:02:01 +0000 Subject: [PATCH 34/52] test --- .github/workflows/bench.yml | 3 ++- site/benchmarks/2025-07-27/index.html | 5 +++-- site/deeplearning/dl-layer-ffn-benchmark.html | 2 +- site/deeplearning/dl-layer-rmsnorm-benchmark.html | 2 +- site/deeplearning/dl-layer-selfattention-benchmark.html | 2 +- site/deeplearning/dl-model-lenet-benchmark.html | 2 +- site/deeplearning/dl-model-mobilenetv3-benchmark.html | 2 +- site/deeplearning/dl-model-resnet18-benchmark.html | 2 +- site/deeplearning/dl-model-tinyllama-benchmark.html | 2 +- site/deeplearning/dl-model-whisper-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithaddf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithdivf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithmulf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithnegf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-arithsubf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- .../dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-mathexp-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-mathfpow-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-matmul-benchmark.html | 2 +- .../dl-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html | 2 +- site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html | 2 +- .../dl-op-linalg-softmax-exp-sum-div-benchmark.html | 2 +- site/deeplearning/dl-op-matmul-transpose-b-benchmark.html | 2 +- site/deeplearning/dl-op-tosa-transpose-benchmark.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- site/vectorization/vectorization_matrix.html | 2 +- 59 files changed, 62 insertions(+), 60 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index a88660fd..10a88e5c 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -57,7 +57,8 @@ jobs: working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark run: | rm -rf site - python3 scripts/logs2html.py test_result site + run_root="${{ env.BENCH_DIR }}" + python3 scripts/logs2html.py $run_root - name: Update benchmarks/latest redirect working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks diff --git a/site/benchmarks/2025-07-27/index.html b/site/benchmarks/2025-07-27/index.html index 4b22d903..25d339ca 100644 --- a/site/benchmarks/2025-07-27/index.html +++ b/site/benchmarks/2025-07-27/index.html @@ -2,11 +2,12 @@ layout: default title: Benchmark run --- +

            Benchmark results

            +
              -{% assign here = page.dir %} {% for f in site.static_files %} - {% if f.path startswith here and f.extname == ".html" and f.name != "index.html" %} + {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %}
            • {{ f.name }}
            • {% endif %} {% endfor %} diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html index 94080724..27b7f635 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

              deeplearning/dl-layer-ffn-benchmark.json

              2025-07-27 16:42:50 UTC

              +

              deeplearning/dl-layer-ffn-benchmark.json

              2025-07-27 16:50:40 UTC

              dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html index 028d8812..c3cd192b 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06540.065410,762
            diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html index b0094968..6c680320 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196356,202
            diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html index 3fd96456..b6bfc854 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html index f46c8ab9..a1594f80 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304
            diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html index 448c20d2..8557177a 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119
            diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html index 36ebc848..b8df77fb 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7317231
            diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html index c08b77cc..4ad859e1 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051
            diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index f0b551e3..4196fdda 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization8e+048e+041
            diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 2228a54a..3a8a4bf5 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02950.029523,451
            diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 53c66143..ebeb7a65 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02980.029823,358
            diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index b0831594..e858a4b1 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02980.029823,441
            diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index 1ff1231f..9c926d46 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02250.022530,969
            diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index c095bd98..df88f2b7 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02940.029423,509
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index b818cdfe..3ef8dbfb 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 945d0864..41b66c96 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index 51c56d7d..4f738ab5 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 3a396fb4..5497f182 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322
            diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html index a2c47a4e..aaa16403 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255
            diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 410a271b..a12078b5 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,225
            diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index 27a3f3f0..a0e86ebe 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08410.08418,255
            diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html index 804965bb..a46efd49 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,537
            diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index d86a0fbf..be7bbdd5 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031
            diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index 6a81eb21..8ed6f3ba 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 16:50:40 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index 21d7b2e5..aef3daf2 100644
            --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 16:50:40 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index f26d3a1b..49da0524 100644
            --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002
            diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index b1655bf3..36bf5b4b 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646
            diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html index 5fe95a6a..27f66b25 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 16:42:50 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 16:50:40 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index cd76ab64..48ecbe92 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 846f90c1..b999c76a 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 153a7403..32736e7e 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 26603ca4..ca210c9b 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 6a746e10..6278f3da 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index a0e016e1..dc902e6f 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 01cbe4bd..79250a98 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index cc213c52..b9008521 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a4deb625..ac19033a 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index a2c7b505..bfb7143f 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 4031f8a9..9585044b 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 6026720f..343bc1b8 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index cc1c57b2..e8850fdd 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 0b7702a3..5fdfc154 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 808832f6..1934206e 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index b563cd76..e80c6249 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index efea4fe9..924105bd 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index c2163bcf..4af98efc 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index d2049bb4..8c0f2c6b 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index dd7cfd34..1e6ad21e 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 76d12eff..4228eca8 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 2a066878..45487d1b 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 58eb1148..c5c20603 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index b55c36a8..17e95ee7 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a1e4b9fd..6755990d 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index ea252e8e..22c23acf 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 78b34ad4..77e47d45 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 88b77b9b..f03bcec8 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:42:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html index c77a525d..f32e27e5 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-07-27 16:42:50 UTC

            +

            vectorization/vectorization_matrix.json

            2025-07-27 16:50:40 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From d39f6a10b02b4ba9f3dc2ed019c21d6ea5aafdae Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 17:04:54 +0000 Subject: [PATCH 35/52] test --- .github/workflows/bench.yml | 2 +- site/benchmarks/2025-07-27/index.html | 14 --- site/benchmarks/latest/index.html | 1 - site/deeplearning/dl-layer-ffn-benchmark.html | 37 -------- .../dl-layer-rmsnorm-benchmark.html | 37 -------- .../dl-layer-selfattention-benchmark.html | 37 -------- .../dl-model-lenet-benchmark.html | 38 -------- .../dl-model-mobilenetv3-benchmark.html | 38 -------- .../dl-model-resnet18-benchmark.html | 37 -------- .../dl-model-tinyllama-benchmark.html | 39 -------- .../dl-model-whisper-benchmark.html | 38 -------- .../dl-op-linalg-arithaddf-benchmark.html | 38 -------- .../dl-op-linalg-arithdivf-benchmark.html | 38 -------- .../dl-op-linalg-arithmulf-benchmark.html | 38 -------- .../dl-op-linalg-arithnegf-benchmark.html | 38 -------- .../dl-op-linalg-arithsubf-benchmark.html | 38 -------- .../dl-op-linalg-batch-matmul-benchmark.html | 49 ---------- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 38 -------- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 42 -------- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 38 -------- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 39 -------- .../dl-op-linalg-mathexp-benchmark.html | 38 -------- .../dl-op-linalg-mathfpow-benchmark.html | 38 -------- .../dl-op-linalg-mathrsqrt-benchmark.html | 38 -------- .../dl-op-linalg-matmul-benchmark.html | 44 --------- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 38 -------- .../dl-op-linalg-reduceaddf-benchmark.html | 26 ----- .../dl-op-linalg-reducemaxf-benchmark.html | 26 ----- ...-linalg-softmax-exp-sum-div-benchmark.html | 38 -------- .../dl-op-matmul-transpose-b-benchmark.html | 42 -------- .../dl-op-tosa-transpose-benchmark.html | 36 ------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- site/vectorization/vectorization_matrix.html | 40 -------- 60 files changed, 1 insertion(+), 3777 deletions(-) delete mode 100644 site/benchmarks/2025-07-27/index.html delete mode 100644 site/benchmarks/latest/index.html delete mode 100644 site/deeplearning/dl-layer-ffn-benchmark.html delete mode 100644 site/deeplearning/dl-layer-rmsnorm-benchmark.html delete mode 100644 site/deeplearning/dl-layer-selfattention-benchmark.html delete mode 100644 site/deeplearning/dl-model-lenet-benchmark.html delete mode 100644 site/deeplearning/dl-model-mobilenetv3-benchmark.html delete mode 100644 site/deeplearning/dl-model-resnet18-benchmark.html delete mode 100644 site/deeplearning/dl-model-tinyllama-benchmark.html delete mode 100644 site/deeplearning/dl-model-whisper-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-arithaddf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-arithdivf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-arithmulf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-arithnegf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-arithsubf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-mathexp-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-mathfpow-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-matmul-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html delete mode 100644 site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html delete mode 100644 site/deeplearning/dl-op-matmul-transpose-b-benchmark.html delete mode 100644 site/deeplearning/dl-op-tosa-transpose-benchmark.html delete mode 100644 site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/vectorization/vectorization_matrix.html diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 10a88e5c..e00dbe5c 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -58,7 +58,7 @@ jobs: run: | rm -rf site run_root="${{ env.BENCH_DIR }}" - python3 scripts/logs2html.py $run_root + python3 scripts/logs2html.py test_result $run_root - name: Update benchmarks/latest redirect working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks diff --git a/site/benchmarks/2025-07-27/index.html b/site/benchmarks/2025-07-27/index.html deleted file mode 100644 index 25d339ca..00000000 --- a/site/benchmarks/2025-07-27/index.html +++ /dev/null @@ -1,14 +0,0 @@ ---- -layout: default -title: Benchmark run ---- - -

            Benchmark results

            - -
              -{% for f in site.static_files %} - {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %} -
            • {{ f.name }}
            • - {% endif %} -{% endfor %} -
            diff --git a/site/benchmarks/latest/index.html b/site/benchmarks/latest/index.html deleted file mode 100644 index 78e7713b..00000000 --- a/site/benchmarks/latest/index.html +++ /dev/null @@ -1 +0,0 @@ - diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html deleted file mode 100644 index 27b7f635..00000000 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ /dev/null @@ -1,37 +0,0 @@ - - - -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-layer-ffn-benchmark.json

            -
            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            - -
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06540.065410,762
            DL_LAYER_FFN/Auto_Vectorization0.02710.027125,673
            -
            Console output -
            2025-07-27T14:26:49+00:00
            -Running ./dl-layer-ffn-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.04, 1.19, 1.31
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------
            -Benchmark                                Time             CPU   Iterations
            ---------------------------------------------------------------------------
            -DL_LAYER_FFN/Scalar                  0.065 ms        0.065 ms        10762
            -DL_LAYER_FFN/Auto_Vectorization      0.027 ms        0.027 ms        25673
            ------------------------------------------------------------
            -Correctness Verification: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html deleted file mode 100644 index c3cd192b..00000000 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ /dev/null @@ -1,37 +0,0 @@ - - - -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-layer-rmsnorm-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196356,202
            DL_LAYER_RMSNORM/Auto_Vectorization0.0009150.000915751,546
            -
            Console output -
            2025-07-27T14:26:53+00:00
            -Running ./dl-layer-rmsnorm-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.03, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            -------------------------------------------------------------------------------
            -Benchmark                                    Time             CPU   Iterations
            -------------------------------------------------------------------------------
            -DL_LAYER_RMSNORM/Scalar                  0.002 ms        0.002 ms       356202
            -DL_LAYER_RMSNORM/Auto_Vectorization      0.001 ms        0.001 ms       751546
            ------------------------------------------------------------
            -Correctness Verification: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html deleted file mode 100644 index 6c680320..00000000 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ /dev/null @@ -1,37 +0,0 @@ - - - -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-layer-selfattention-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            DL_LAYER_ATTENTION/Auto_Vectorization1.571.57446
            -
            Console output -
            2025-07-27T14:26:51+00:00
            -Running ./dl-layer-selfattention-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.04, 1.19, 1.31
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------
            -Benchmark                                      Time             CPU   Iterations
            ---------------------------------------------------------------------------------
            -DL_LAYER_ATTENTION/Scalar                   4.69 ms         4.69 ms          149
            -DL_LAYER_ATTENTION/Auto_Vectorization       1.57 ms         1.57 ms          446
            ------------------------------------------------------------
            -Correctness Verification: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html deleted file mode 100644 index b6bfc854..00000000 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-model-lenet-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304
            DL_MODEL_LENET/Buddy_Vectorization0.1370.1375,022
            -
            Console output -
            2025-07-27T14:22:52+00:00
            -Running ./dl-model-lenet-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.40, 1.39, 1.40
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ------------------------------------------------------------------------------
            -Benchmark                                   Time             CPU   Iterations
            ------------------------------------------------------------------------------
            -DL_MODEL_LENET/Auto_Vectorization       0.165 ms        0.165 ms         4304
            -DL_MODEL_LENET/Buddy_Vectorization      0.137 ms        0.137 ms         5022
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: FAIL
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html deleted file mode 100644 index a1594f80..00000000 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-model-mobilenetv3-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119
            BM_MobileNet_V3/BM_MobileNet_V3_conv_opt333321
            -
            Console output -
            2025-07-27T14:22:49+00:00
            -Running ./dl-model-mobilenetv3-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.40, 1.39, 1.40
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ------------------------------------------------------------------------------------
            -Benchmark                                         Time             CPU   Iterations
            ------------------------------------------------------------------------------------
            -BM_MobileNet_V3/BM_MobileNet_V3_scalar         37.1 ms         37.1 ms           19
            -BM_MobileNet_V3/BM_MobileNet_V3_conv_opt       33.0 ms         33.0 ms           21
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html deleted file mode 100644 index 8557177a..00000000 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ /dev/null @@ -1,37 +0,0 @@ - - - -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-model-resnet18-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7317231
            DL_MODEL_Resnet18/Buddy_Vectorization7297221
            -
            Console output -
            2025-07-27T14:26:46+00:00
            -Running ./dl-model-resnet18-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.04, 1.19, 1.31
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------
            -Benchmark                                      Time             CPU   Iterations
            ---------------------------------------------------------------------------------
            -DL_MODEL_Resnet18/Auto_Vectorization         731 ms          723 ms            1
            -DL_MODEL_Resnet18/Buddy_Vectorization        729 ms          722 ms            1
            ------------------------------------------------------------
            -Correctness Verification: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html deleted file mode 100644 index b8df77fb..00000000 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ /dev/null @@ -1,39 +0,0 @@ - - - -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-model-tinyllama-benchmark.json

            - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051
            DL_MODEL_TINYLLAMA/matmul_opt1e+041e+041
            DL_MODEL_TINYLLAMA/matmul_opt_omp7.84e+037.2e+031
            -
            Console output -
            2025-07-27T14:17:33+00:00
            -Running ./dl-model-tinyllama-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.70, 1.92, 1.54
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            -----------------------------------------------------------------------------
            -Benchmark                                  Time             CPU   Iterations
            -----------------------------------------------------------------------------
            -DL_MODEL_TINYLLAMA/scalar             139185 ms       139179 ms            1
            -DL_MODEL_TINYLLAMA/matmul_opt          10038 ms        10038 ms            1
            -DL_MODEL_TINYLLAMA/matmul_opt_omp       7836 ms         7201 ms            1
            ----------- Verification ----------
            -matmul_opt PASS
            -matmul_opt_omp PASS
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html deleted file mode 100644 index 4ad859e1..00000000 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-model-whisper-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization8e+048e+041
            DL_MODEL_Whisper/Buddy_Vectorization3.67e+043.67e+041
            -
            Console output -
            2025-07-27T14:22:54+00:00
            -Running ./dl-model-whisper-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.45, 1.40, 1.40
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            --------------------------------------------------------------------------------
            -Benchmark                                     Time             CPU   Iterations
            --------------------------------------------------------------------------------
            -DL_MODEL_Whisper/Auto_Vectorization       79983 ms        79980 ms            1
            -DL_MODEL_Whisper/Buddy_Vectorization      36713 ms        36700 ms            1
            ------------------------------------------------------------
            -Correctness Verification for Output1: PASS
            -Correctness Verification for Output2: FAIL
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html deleted file mode 100644 index 4196fdda..00000000 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-arithaddf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02950.029523,451
            BM_ADDF_AutoVectorization0.0040.004174,931
            -
            Console output -
            2025-07-27T14:27:23+00:00
            -Running ./dl-op-linalg-arithaddf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.07, 1.18, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_ADDF_SCALAR                 0.030 ms        0.030 ms        23451
            -BM_ADDF_AutoVectorization      0.004 ms        0.004 ms       174931
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html deleted file mode 100644 index 3a8a4bf5..00000000 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-arithdivf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02980.029823,358
            BM_DIVF_AutoVectorization0.009490.0094967,517
            -
            Console output -
            2025-07-27T14:27:25+00:00
            -Running ./dl-op-linalg-arithdivf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.07, 1.18, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_DIVF_SCALAR                 0.030 ms        0.030 ms        23358
            -BM_DIVF_AutoVectorization      0.009 ms        0.009 ms        67517
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html deleted file mode 100644 index ebeb7a65..00000000 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-arithmulf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02980.029823,441
            BM_MULF_AutoVectorization0.0040.004175,263
            -
            Console output -
            2025-07-27T14:27:27+00:00
            -Running ./dl-op-linalg-arithmulf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.07, 1.18, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_MULF_SCALAR                 0.030 ms        0.030 ms        23441
            -BM_MULF_AutoVectorization      0.004 ms        0.004 ms       175263
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html deleted file mode 100644 index e858a4b1..00000000 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-arithnegf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02250.022530,969
            BM_NEGF_AutoVectorization0.002460.00246277,205
            -
            Console output -
            2025-07-27T14:27:29+00:00
            -Running ./dl-op-linalg-arithnegf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.07, 1.18, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_NEGF_SCALAR                 0.023 ms        0.023 ms        30969
            -BM_NEGF_AutoVectorization      0.002 ms        0.002 ms       277205
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html deleted file mode 100644 index 9c926d46..00000000 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-arithsubf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02940.029423,509
            BM_SUBF_AutoVectorization0.003990.00399175,223
            -
            Console output -
            2025-07-27T14:27:31+00:00
            -Running ./dl-op-linalg-arithsubf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.07, 1.18, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_SUBF_SCALAR                 0.029 ms        0.029 ms        23509
            -BM_SUBF_AutoVectorization      0.004 ms        0.004 ms       175223
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html deleted file mode 100644 index df88f2b7..00000000 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ /dev/null @@ -1,49 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-batch-matmul-benchmark.json

            - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:19769761
            DL_OPS_BATCH_MATMUL/Vectorization/iterations:11951951
            DL_OPS_BATCH_MATMUL/Tile/iterations:11091091
            DL_OPS_BATCH_MATMUL/SCF/iterations:11181181
            DL_OPS_BATCH_MATMUL/BROADCAST/iterations:13563561
            DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:16232.11
            -
            Console output -
            2025-07-27T14:27:13+00:00
            -Running ./dl-op-linalg-batch-matmul-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.08, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ----------------------------------------------------------------------------------------------
            -Benchmark                                                   Time             CPU   Iterations
            ----------------------------------------------------------------------------------------------
            -DL_OPS_BATCH_MATMUL/Scalar/iterations:1                  3536 ms         3536 ms            1
            -DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1        976 ms          976 ms            1
            -DL_OPS_BATCH_MATMUL/Vectorization/iterations:1            195 ms          195 ms            1
            -DL_OPS_BATCH_MATMUL/Tile/iterations:1                     109 ms          109 ms            1
            -DL_OPS_BATCH_MATMUL/SCF/iterations:1                      118 ms          118 ms            1
            -DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1                356 ms          356 ms            1
            -DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1           62.0 ms         32.1 ms            1
            ----------- Verification ----------
            -Tile PASS
            -SCF PASS
            -BROADCAST PASS
            -BROADCAST_OMP PASS
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html deleted file mode 100644 index 3ef8dbfb..00000000 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            BM_Conv2DNchwFchw_Im2col6.86.8101
            -
            Console output -
            2025-07-27T14:27:06+00:00
            -Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.10, 1.20, 1.31
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            --------------------------------------------------------------------
            -Benchmark                         Time             CPU   Iterations
            --------------------------------------------------------------------
            -BM_Conv2DNchwFchw_SCALAR        283 ms          283 ms            2
            -BM_Conv2DNchwFchw_Im2col       6.80 ms         6.80 ms          101
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html deleted file mode 100644 index 41b66c96..00000000 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ /dev/null @@ -1,42 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            - - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:59.359.355
            DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:51.821.825
            DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:51.821.825
            -
            Console output -
            2025-07-27T14:27:10+00:00
            -Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.09, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ----------------------------------------------------------------------------------------------------
            -Benchmark                                                         Time             CPU   Iterations
            ----------------------------------------------------------------------------------------------------
            -DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5                   72.3 ms         72.3 ms            5
            -DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5       9.35 ms         9.35 ms            5
            -DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5            1.82 ms         1.82 ms            5
            -DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5                 1.82 ms         1.82 ms            5
            ----------- Verification ----------
            -auto_vectorization PASS
            -vectorization PASS
            -vec_tile PASS
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html deleted file mode 100644 index 4f738ab5..00000000 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322
            BM_CONV_2D_NHWC_HWCF_AutoVectorization6.136.13113
            -
            Console output -
            2025-07-27T14:27:08+00:00
            -Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.09, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ----------------------------------------------------------------------------------
            -Benchmark                                       Time             CPU   Iterations
            ----------------------------------------------------------------------------------
            -BM_CONV_2D_NHWC_HWCF_SCALAR                  32.3 ms         32.3 ms           22
            -BM_CONV_2D_NHWC_HWCF_AutoVectorization       6.13 ms         6.13 ms          113
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html deleted file mode 100644 index 5497f182..00000000 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ /dev/null @@ -1,39 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:51.711.715
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:50.1250.1255
            -
            Console output -
            2025-07-27T14:27:11+00:00
            -Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.09, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            -------------------------------------------------------------------------------------------------------------
            -Benchmark                                                                  Time             CPU   Iterations
            -------------------------------------------------------------------------------------------------------------
            -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5                   4.25 ms         4.25 ms            5
            -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5       1.71 ms         1.71 ms            5
            -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5           0.125 ms        0.125 ms            5
            ----------- Verification ----------
            -auto_vectorization PASS
            -vectorization PASS
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html deleted file mode 100644 index aaa16403..00000000 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-mathexp-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,225
            BM_EXP_AutoVectorization0.03160.031622,248
            -
            Console output -
            2025-07-27T14:27:37+00:00
            -Running ./dl-op-linalg-mathexp-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.18, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            --------------------------------------------------------------------
            -Benchmark                         Time             CPU   Iterations
            --------------------------------------------------------------------
            -BM_EXP_SCALAR                 0.046 ms        0.046 ms        15225
            -BM_EXP_AutoVectorization      0.032 ms        0.032 ms        22248
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html deleted file mode 100644 index a12078b5..00000000 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-mathfpow-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08410.08418,255
            BM_FPOW_AutoVectorization0.05690.056912,305
            -
            Console output -
            2025-07-27T14:27:33+00:00
            -Running ./dl-op-linalg-mathfpow-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.18, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_FPOW_SCALAR                 0.084 ms        0.084 ms         8255
            -BM_FPOW_AutoVectorization      0.057 ms        0.057 ms        12305
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html deleted file mode 100644 index a0e86ebe..00000000 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-mathrsqrt-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,537
            BM_RSQRT_AutoVectorization0.004350.00435160,927
            -
            Console output -
            2025-07-27T14:27:35+00:00
            -Running ./dl-op-linalg-mathrsqrt-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.18, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ----------------------------------------------------------------------
            -Benchmark                           Time             CPU   Iterations
            ----------------------------------------------------------------------
            -BM_RSQRT_SCALAR                 0.073 ms        0.073 ms         9537
            -BM_RSQRT_AutoVectorization      0.004 ms        0.004 ms       160927
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html deleted file mode 100644 index a46efd49..00000000 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ /dev/null @@ -1,44 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-matmul-benchmark.json

            - - - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031
            DL_OPS_MATMUL/scalar_O3/iterations:13.21e+033.21e+031
            DL_OPS_MATMUL/tile/iterations:11171171
            DL_OPS_MATMUL/vec/iterations:159.959.91
            DL_OPS_MATMUL/vec_omp/iterations:1229.111
            -
            Console output -
            2025-07-27T14:26:55+00:00
            -Running ./dl-op-linalg-matmul-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.03, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            --------------------------------------------------------------------------------
            -Benchmark                                     Time             CPU   Iterations
            --------------------------------------------------------------------------------
            -DL_OPS_MATMUL/scalar_O0/iterations:1       3933 ms         3933 ms            1
            -DL_OPS_MATMUL/scalar_O3/iterations:1       3212 ms         3212 ms            1
            -DL_OPS_MATMUL/tile/iterations:1             117 ms          117 ms            1
            -DL_OPS_MATMUL/vec/iterations:1             59.9 ms         59.9 ms            1
            -DL_OPS_MATMUL/vec_omp/iterations:1         22.0 ms         9.11 ms            1
            ----------- Verification ----------
            -tile PASS
            -vec PASS
            -vec_omp PASS
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html deleted file mode 100644 index be7bbdd5..00000000 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002
            BM_POOLING_NHWC_SUM_AutoVectorization0.04140.041416,950
            -
            Console output -
            2025-07-27T14:27:11+00:00
            -Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.09, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------
            -Benchmark                                      Time             CPU   Iterations
            ---------------------------------------------------------------------------------
            -BM_POOLING_NHWC_SUM_SCALAR                 0.233 ms        0.233 ms         3002
            -BM_POOLING_NHWC_SUM_AutoVectorization      0.041 ms        0.041 ms        16950
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html deleted file mode 100644 index 8ed6f3ba..00000000 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ /dev/null @@ -1,26 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 16:50:40 UTC

            -
            ⚠ FAILED: JSON parse error: Expecting value
            -
            Console output -
            2025-07-27T14:27:39+00:00
            -Running ./dl-op-linalg-reduceaddf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.17, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html deleted file mode 100644 index aef3daf2..00000000 --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html +++ /dev/null @@ -1,26 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 16:50:40 UTC

            -
            ⚠ FAILED: JSON parse error: Expecting value
            -
            Console output -
            2025-07-27T14:27:39+00:00
            -Running ./dl-op-linalg-reducemaxf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.17, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html deleted file mode 100644 index 49da0524..00000000 --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646
            BM_SOFTMAXEXPSUMDIV_AutoVectorization0.003850.00385181,826
            -
            Console output -
            2025-07-27T14:27:39+00:00
            -Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.17, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------
            -Benchmark                                      Time             CPU   Iterations
            ---------------------------------------------------------------------------------
            -BM_SOFTMAXEXPSUMDIV_SCALAR                 0.006 ms        0.006 ms       121646
            -BM_SOFTMAXEXPSUMDIV_AutoVectorization      0.004 ms        0.004 ms       181826
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html deleted file mode 100644 index 36bf5b4b..00000000 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ /dev/null @@ -1,42 +0,0 @@ - - - -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-matmul-transpose-b-benchmark.json

            - - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:52782785
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:532.322.45
            DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:585.585.55
            -
            Console output -
            2025-07-27T14:27:42+00:00
            -Running ./dl-op-matmul-transpose-b-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.17, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ------------------------------------------------------------------------------------------------
            -Benchmark                                                     Time             CPU   Iterations
            ------------------------------------------------------------------------------------------------
            -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5           1051 ms         1050 ms            5
            -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5            278 ms          278 ms            5
            -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5       32.3 ms         22.4 ms            5
            -DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5                 85.5 ms         85.5 ms            5
            ----------- Verification ----------
            -scalar_O3 PASS
            -scalar_O3_omp PASS
            -vec PASS
            -
            \ No newline at end of file diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html deleted file mode 100644 index 27f66b25..00000000 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ /dev/null @@ -1,36 +0,0 @@ - - - -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 16:50:40 UTC

            -

            dl-op-tosa-transpose-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45
            DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:518.913.45
            -
            Console output -
            2025-07-27T14:27:41+00:00
            -Running ./dl-op-tosa-transpose-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.17, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            --------------------------------------------------------------------------------------
            -Benchmark                                           Time             CPU   Iterations
            --------------------------------------------------------------------------------------
            -DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5       26.4 ms         21.4 ms            5
            -DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5       18.9 ms         13.4 ms            5
            ----------- Verification ----------
            -scalar_O3 PASS
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 48ecbe92..00000000 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            MLIR_Conv2D/17.197.1997
            Buddy_Conv2D/10.4180.4181,675
            Buddy_Corr2D_Constant_Padding/11.061.06666
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,817
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,689
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,253
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,956
            Buddy_Erosion2D_Constant_Padding/10.2150.2153,274
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,255
            Buddy_Opening2D_Constant_Padding/10.3140.3142,261
            Buddy_Closing2D_Constant_Padding/10.3180.3182,259
            Buddy_TopHat2D_Constant_Padding/10.810.81835
            Buddy_BottomHat2D_Constant_Padding/10.7870.787848
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,119
            OpenCV_Opening2D_Constant_Padding/10.2260.2263,092
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,082
            OpenCV_TopHat2D_Constant_Padding/10.260.262,689
            OpenCV_BottomHat2D_Constant_Padding/10.2590.2592,705
            OpenCV_MorphGrad2D_Constant_Padding/10.2520.2522,779
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,113
            -
            Console output -
            2025-06-01T10:09:28+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.13, 1.85
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          143
            -MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
            -Buddy_Conv2D/1                                         0.418 ms        0.418 ms         1675
            -Buddy_Corr2D_Constant_Padding/1                         1.06 ms         1.06 ms          666
            -OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4817
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2689
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105253
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49956
            -Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3274
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3255
            -Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2261
            -Buddy_Closing2D_Constant_Padding/1                     0.318 ms        0.318 ms         2259
            -Buddy_TopHat2D_Constant_Padding/1                      0.810 ms        0.810 ms          835
            -Buddy_BottomHat2D_Constant_Padding/1                   0.787 ms        0.787 ms          848
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5119
            -OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3092
            -OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3082
            -OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2689
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.259 ms        0.259 ms         2705
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.252 ms        0.252 ms         2779
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5113
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index b999c76a..00000000 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            MLIR_Conv2D/17.187.1897
            Buddy_Conv2D/10.4180.4181,675
            Buddy_Corr2D_Constant_Padding/11.061.06662
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,155
            OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,833
            Buddy_Erosion2D_Constant_Padding/10.2150.2153,267
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,262
            Buddy_Opening2D_Constant_Padding/10.3090.3092,259
            Buddy_Closing2D_Constant_Padding/10.3110.3112,232
            Buddy_TopHat2D_Constant_Padding/10.8010.801854
            Buddy_BottomHat2D_Constant_Padding/10.7950.795833
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,118
            OpenCV_Opening2D_Constant_Padding/10.2240.2243,117
            OpenCV_Closing2D_Constant_Padding/10.2260.2263,087
            OpenCV_TopHat2D_Constant_Padding/10.2560.2562,731
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,693
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,799
            OpenCV_Dilate2D_Constant_Padding/10.1390.1395,051
            -
            Console output -
            2025-06-01T10:09:52+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.12, 1.83
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          143
            -MLIR_Conv2D/1                                           7.18 ms         7.18 ms           97
            -Buddy_Conv2D/1                                         0.418 ms        0.418 ms         1675
            -Buddy_Corr2D_Constant_Padding/1                         1.06 ms         1.06 ms          662
            -OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105155
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49833
            -Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3267
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3262
            -Buddy_Opening2D_Constant_Padding/1                     0.309 ms        0.309 ms         2259
            -Buddy_Closing2D_Constant_Padding/1                     0.311 ms        0.311 ms         2232
            -Buddy_TopHat2D_Constant_Padding/1                      0.801 ms        0.801 ms          854
            -Buddy_BottomHat2D_Constant_Padding/1                   0.795 ms        0.795 ms          833
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5118
            -OpenCV_Opening2D_Constant_Padding/1                    0.224 ms        0.224 ms         3117
            -OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3087
            -OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2731
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2693
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2799
            -OpenCV_Dilate2D_Constant_Padding/1                     0.139 ms        0.139 ms         5051
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 32736e7e..00000000 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            MLIR_Conv2D/1292925
            Buddy_Conv2D/11.111.11632
            Buddy_Corr2D_Constant_Padding/11.741.74400
            OpenCV_Filter2D_Constant_Padding/12.682.68262
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,855
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006630.00663105,416
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,870
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,258
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,246
            Buddy_Opening2D_Constant_Padding/10.320.322,260
            Buddy_Closing2D_Constant_Padding/10.310.312,223
            Buddy_TopHat2D_Constant_Padding/10.8060.806827
            Buddy_BottomHat2D_Constant_Padding/10.820.82852
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,096
            OpenCV_Opening2D_Constant_Padding/10.2230.2233,136
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,085
            OpenCV_TopHat2D_Constant_Padding/10.260.262,693
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,686
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,746
            OpenCV_Dilate2D_Constant_Padding/10.1340.1345,208
            -
            Console output -
            2025-06-01T10:10:17+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.11, 1.81
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           61
            -MLIR_Conv2D/1                                           29.0 ms         29.0 ms           25
            -Buddy_Conv2D/1                                          1.11 ms         1.11 ms          632
            -Buddy_Corr2D_Constant_Padding/1                         1.74 ms         1.74 ms          400
            -OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          262
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4855
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2692
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105416
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49870
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3258
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3246
            -Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2260
            -Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2223
            -Buddy_TopHat2D_Constant_Padding/1                      0.806 ms        0.806 ms          827
            -Buddy_BottomHat2D_Constant_Padding/1                   0.820 ms        0.820 ms          852
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5096
            -OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3136
            -OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3085
            -OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2693
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2686
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2746
            -OpenCV_Dilate2D_Constant_Padding/1                     0.134 ms        0.134 ms         5208
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index ca210c9b..00000000 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            MLIR_Conv2D/1292925
            Buddy_Conv2D/11.021.02685
            Buddy_Corr2D_Constant_Padding/11.751.75400
            OpenCV_Filter2D_Constant_Padding/12.682.68261
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,858
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,372
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,847
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,249
            Buddy_Dilation2D_Constant_Padding/10.2130.2133,265
            Buddy_Opening2D_Constant_Padding/10.3140.3142,214
            Buddy_Closing2D_Constant_Padding/10.3080.3082,229
            Buddy_TopHat2D_Constant_Padding/10.790.79828
            Buddy_BottomHat2D_Constant_Padding/10.7770.777854
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,075
            OpenCV_Opening2D_Constant_Padding/10.2250.2253,111
            OpenCV_Closing2D_Constant_Padding/10.2290.2293,056
            OpenCV_TopHat2D_Constant_Padding/10.2620.2622,672
            OpenCV_BottomHat2D_Constant_Padding/10.2640.2642,653
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,750
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,201
            -
            Console output -
            2025-06-01T10:10:41+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.10, 1.79
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
            -MLIR_Conv2D/1                                           29.0 ms         29.0 ms           25
            -Buddy_Conv2D/1                                          1.02 ms         1.02 ms          685
            -Buddy_Corr2D_Constant_Padding/1                         1.75 ms         1.75 ms          400
            -OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4858
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105372
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49847
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3249
            -Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3265
            -Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2214
            -Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2229
            -Buddy_TopHat2D_Constant_Padding/1                      0.790 ms        0.790 ms          828
            -Buddy_BottomHat2D_Constant_Padding/1                   0.777 ms        0.777 ms          854
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5075
            -OpenCV_Opening2D_Constant_Padding/1                    0.225 ms        0.225 ms         3111
            -OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3056
            -OpenCV_TopHat2D_Constant_Padding/1                     0.262 ms        0.262 ms         2672
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.264 ms        0.264 ms         2653
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2750
            -OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5201
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 6278f3da..00000000 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            MLIR_Conv2D/17.387.3895
            Buddy_Conv2D/10.3120.3122,241
            Buddy_Corr2D_Constant_Padding/10.8210.821849
            OpenCV_Filter2D_Constant_Padding/11.281.28547
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1460.1464,774
            Buddy_Resize2D_Bilinear_Interpolation/10.2670.2672,627
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00670.0067103,069
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,911
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,259
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,230
            Buddy_Opening2D_Constant_Padding/10.3090.3092,255
            Buddy_Closing2D_Constant_Padding/10.3090.3092,273
            Buddy_TopHat2D_Constant_Padding/10.7760.776855
            Buddy_BottomHat2D_Constant_Padding/10.7740.774856
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,148
            OpenCV_Opening2D_Constant_Padding/10.2190.2193,185
            OpenCV_Closing2D_Constant_Padding/10.2230.2233,143
            OpenCV_TopHat2D_Constant_Padding/10.2590.2592,699
            OpenCV_BottomHat2D_Constant_Padding/10.2580.2582,714
            OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,791
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,112
            -
            Console output -
            2025-06-01T10:05:28+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.19, 1.29, 2.11
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.87 ms         4.87 ms          144
            -MLIR_Conv2D/1                                           7.38 ms         7.38 ms           95
            -Buddy_Conv2D/1                                         0.312 ms        0.312 ms         2241
            -Buddy_Corr2D_Constant_Padding/1                        0.821 ms        0.821 ms          849
            -OpenCV_Filter2D_Constant_Padding/1                      1.28 ms         1.28 ms          547
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.146 ms        0.146 ms         4774
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.267 ms        0.267 ms         2627
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       103069
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49911
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3259
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3230
            -Buddy_Opening2D_Constant_Padding/1                     0.309 ms        0.309 ms         2255
            -Buddy_Closing2D_Constant_Padding/1                     0.309 ms        0.309 ms         2273
            -Buddy_TopHat2D_Constant_Padding/1                      0.776 ms        0.776 ms          855
            -Buddy_BottomHat2D_Constant_Padding/1                   0.774 ms        0.774 ms          856
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5148
            -OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3185
            -OpenCV_Closing2D_Constant_Padding/1                    0.223 ms        0.223 ms         3143
            -OpenCV_TopHat2D_Constant_Padding/1                     0.259 ms        0.259 ms         2699
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.258 ms        0.258 ms         2714
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2791
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5112
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index dc902e6f..00000000 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            MLIR_Conv2D/17.217.2197
            Buddy_Conv2D/10.3110.312,257
            Buddy_Corr2D_Constant_Padding/10.7980.798878
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,872
            Buddy_Resize2D_Bilinear_Interpolation/10.2620.2622,654
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,278
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,913
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,221
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,271
            Buddy_Opening2D_Constant_Padding/10.3160.3162,213
            Buddy_Closing2D_Constant_Padding/10.3110.3112,230
            Buddy_TopHat2D_Constant_Padding/10.80.8866
            Buddy_BottomHat2D_Constant_Padding/10.7970.797846
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,058
            OpenCV_Opening2D_Constant_Padding/10.2220.2223,149
            OpenCV_Closing2D_Constant_Padding/10.2210.2213,169
            OpenCV_TopHat2D_Constant_Padding/10.2570.2572,725
            OpenCV_BottomHat2D_Constant_Padding/10.2570.2572,715
            OpenCV_MorphGrad2D_Constant_Padding/10.250.252,798
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,116
            -
            Console output -
            2025-06-01T10:05:52+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.20, 1.29, 2.09
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.90 ms         4.90 ms          144
            -MLIR_Conv2D/1                                           7.21 ms         7.21 ms           97
            -Buddy_Conv2D/1                                         0.311 ms        0.310 ms         2257
            -Buddy_Corr2D_Constant_Padding/1                        0.798 ms        0.798 ms          878
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4872
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.262 ms        0.262 ms         2654
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105278
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49913
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3221
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3271
            -Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2213
            -Buddy_Closing2D_Constant_Padding/1                     0.311 ms        0.311 ms         2230
            -Buddy_TopHat2D_Constant_Padding/1                      0.800 ms        0.800 ms          866
            -Buddy_BottomHat2D_Constant_Padding/1                   0.797 ms        0.797 ms          846
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5058
            -OpenCV_Opening2D_Constant_Padding/1                    0.222 ms        0.222 ms         3149
            -OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3169
            -OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2725
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.257 ms        0.257 ms         2715
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2798
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5116
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 79250a98..00000000 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            MLIR_Conv2D/17.27.297
            Buddy_Conv2D/10.310.312,252
            Buddy_Corr2D_Constant_Padding/10.8020.802868
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,649
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669105,099
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,931
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,253
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,245
            Buddy_Opening2D_Constant_Padding/10.3160.3162,229
            Buddy_Closing2D_Constant_Padding/10.3130.3132,256
            Buddy_TopHat2D_Constant_Padding/10.8040.804822
            Buddy_BottomHat2D_Constant_Padding/10.7990.799842
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,153
            OpenCV_Opening2D_Constant_Padding/10.220.223,158
            OpenCV_Closing2D_Constant_Padding/10.2210.2213,163
            OpenCV_TopHat2D_Constant_Padding/10.2570.2572,727
            OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,742
            OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,826
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,148
            -
            Console output -
            2025-06-01T10:06:16+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.13, 1.26, 2.06
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.77 ms         4.77 ms          147
            -MLIR_Conv2D/1                                           7.20 ms         7.20 ms           97
            -Buddy_Conv2D/1                                         0.310 ms        0.310 ms         2252
            -Buddy_Corr2D_Constant_Padding/1                        0.802 ms        0.802 ms          868
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2649
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105099
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49931
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3253
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3245
            -Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2229
            -Buddy_Closing2D_Constant_Padding/1                     0.313 ms        0.313 ms         2256
            -Buddy_TopHat2D_Constant_Padding/1                      0.804 ms        0.804 ms          822
            -Buddy_BottomHat2D_Constant_Padding/1                   0.799 ms        0.799 ms          842
            -OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5153
            -OpenCV_Opening2D_Constant_Padding/1                    0.220 ms        0.220 ms         3158
            -OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3163
            -OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2727
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2742
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2826
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5148
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index b9008521..00000000 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            MLIR_Conv2D/17.177.1798
            Buddy_Conv2D/10.310.312,260
            Buddy_Corr2D_Constant_Padding/10.7950.795875
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,871
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,651
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,620
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,783
            Buddy_Erosion2D_Constant_Padding/10.2180.2183,101
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,270
            Buddy_Opening2D_Constant_Padding/10.3190.3192,180
            Buddy_Closing2D_Constant_Padding/10.3120.3122,262
            Buddy_TopHat2D_Constant_Padding/10.8140.814841
            Buddy_BottomHat2D_Constant_Padding/10.820.82849
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,157
            OpenCV_Opening2D_Constant_Padding/10.2190.2193,187
            OpenCV_Closing2D_Constant_Padding/10.2180.2183,207
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,745
            OpenCV_BottomHat2D_Constant_Padding/10.2530.2532,766
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,808
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,102
            -
            Console output -
            2025-06-01T10:06:40+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.08, 1.24, 2.03
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.91 ms         4.91 ms          143
            -MLIR_Conv2D/1                                           7.17 ms         7.17 ms           98
            -Buddy_Conv2D/1                                         0.310 ms        0.310 ms         2260
            -Buddy_Corr2D_Constant_Padding/1                        0.795 ms        0.795 ms          875
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4871
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2651
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104620
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49783
            -Buddy_Erosion2D_Constant_Padding/1                     0.218 ms        0.218 ms         3101
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3270
            -Buddy_Opening2D_Constant_Padding/1                     0.319 ms        0.319 ms         2180
            -Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2262
            -Buddy_TopHat2D_Constant_Padding/1                      0.814 ms        0.814 ms          841
            -Buddy_BottomHat2D_Constant_Padding/1                   0.820 ms        0.820 ms          849
            -OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5157
            -OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3187
            -OpenCV_Closing2D_Constant_Padding/1                    0.218 ms        0.218 ms         3207
            -OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2745
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.253 ms        0.253 ms         2766
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2808
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5102
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index ac19033a..00000000 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            MLIR_Conv2D/129.229.224
            Buddy_Conv2D/11.311.31536
            Buddy_Corr2D_Constant_Padding/12.332.33300
            OpenCV_Filter2D_Constant_Padding/14.114.11170
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,689
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,080
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,721
            Buddy_Erosion2D_Constant_Padding/10.2160.2163,235
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,260
            Buddy_Opening2D_Constant_Padding/10.310.312,249
            Buddy_Closing2D_Constant_Padding/10.3120.3122,139
            Buddy_TopHat2D_Constant_Padding/10.780.78826
            Buddy_BottomHat2D_Constant_Padding/10.7820.782830
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,049
            OpenCV_Opening2D_Constant_Padding/10.2260.2263,095
            OpenCV_Closing2D_Constant_Padding/10.2250.2253,109
            OpenCV_TopHat2D_Constant_Padding/10.260.262,690
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,688
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,759
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,116
            -
            Console output -
            2025-06-01T10:07:04+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.05, 1.22, 2.00
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           60
            -MLIR_Conv2D/1                                           29.2 ms         29.2 ms           24
            -Buddy_Conv2D/1                                          1.31 ms         1.31 ms          536
            -Buddy_Corr2D_Constant_Padding/1                         2.33 ms         2.33 ms          300
            -OpenCV_Filter2D_Constant_Padding/1                      4.11 ms         4.11 ms          170
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2689
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105080
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49721
            -Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3235
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3260
            -Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2249
            -Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2139
            -Buddy_TopHat2D_Constant_Padding/1                      0.780 ms        0.780 ms          826
            -Buddy_BottomHat2D_Constant_Padding/1                   0.782 ms        0.782 ms          830
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5049
            -OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3095
            -OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3109
            -OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2690
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2688
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2759
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5116
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index bfb7143f..00000000 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            MLIR_Conv2D/129.129.124
            Buddy_Conv2D/11.381.38508
            Buddy_Corr2D_Constant_Padding/12.322.32301
            OpenCV_Filter2D_Constant_Padding/14.14.1170
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,064
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,925
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,267
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,269
            Buddy_Opening2D_Constant_Padding/10.3160.3162,235
            Buddy_Closing2D_Constant_Padding/10.3150.3152,209
            Buddy_TopHat2D_Constant_Padding/10.8010.801841
            Buddy_BottomHat2D_Constant_Padding/10.7850.785852
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,129
            OpenCV_Opening2D_Constant_Padding/10.2250.2253,105
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,082
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,679
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,672
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,751
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,094
            -
            Console output -
            2025-06-01T10:07:28+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.03, 1.20, 1.98
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           60
            -MLIR_Conv2D/1                                           29.1 ms         29.1 ms           24
            -Buddy_Conv2D/1                                          1.38 ms         1.38 ms          508
            -Buddy_Corr2D_Constant_Padding/1                         2.32 ms         2.32 ms          301
            -OpenCV_Filter2D_Constant_Padding/1                      4.10 ms         4.10 ms          170
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105064
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49925
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3267
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3269
            -Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2235
            -Buddy_Closing2D_Constant_Padding/1                     0.315 ms        0.315 ms         2209
            -Buddy_TopHat2D_Constant_Padding/1                      0.801 ms        0.801 ms          841
            -Buddy_BottomHat2D_Constant_Padding/1                   0.785 ms        0.785 ms          852
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5129
            -OpenCV_Opening2D_Constant_Padding/1                    0.225 ms        0.225 ms         3105
            -OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3082
            -OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2679
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2672
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2751
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5094
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 9585044b..00000000 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            MLIR_Conv2D/166.766.711
            Buddy_Conv2D/12.242.24312
            Buddy_Corr2D_Constant_Padding/14.674.67150
            OpenCV_Filter2D_Constant_Padding/18.618.6181
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,200
            OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,717
            Buddy_Erosion2D_Constant_Padding/10.2130.2133,275
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,279
            Buddy_Opening2D_Constant_Padding/10.310.312,249
            Buddy_Closing2D_Constant_Padding/10.3140.3142,267
            Buddy_TopHat2D_Constant_Padding/10.7890.789827
            Buddy_BottomHat2D_Constant_Padding/10.7630.763845
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,188
            OpenCV_Opening2D_Constant_Padding/10.2290.2293,054
            OpenCV_Closing2D_Constant_Padding/10.2290.2293,052
            OpenCV_TopHat2D_Constant_Padding/10.2620.2622,667
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,674
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,759
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,114
            -
            Console output -
            2025-06-01T10:07:52+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.02, 1.19, 1.96
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      21.5 ms         21.5 ms           31
            -MLIR_Conv2D/1                                           66.7 ms         66.7 ms           11
            -Buddy_Conv2D/1                                          2.24 ms         2.24 ms          312
            -Buddy_Corr2D_Constant_Padding/1                         4.67 ms         4.67 ms          150
            -OpenCV_Filter2D_Constant_Padding/1                      8.61 ms         8.61 ms           81
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105200
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49717
            -Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3275
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3279
            -Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2249
            -Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.314 ms         2267
            -Buddy_TopHat2D_Constant_Padding/1                      0.789 ms        0.789 ms          827
            -Buddy_BottomHat2D_Constant_Padding/1                   0.763 ms        0.763 ms          845
            -OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5188
            -OpenCV_Opening2D_Constant_Padding/1                    0.229 ms        0.229 ms         3054
            -OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3052
            -OpenCV_TopHat2D_Constant_Padding/1                     0.262 ms        0.262 ms         2667
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2674
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2759
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5114
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 343bc1b8..00000000 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            MLIR_Conv2D/166.766.710
            Buddy_Conv2D/12.342.34299
            Buddy_Corr2D_Constant_Padding/14.674.67150
            OpenCV_Filter2D_Constant_Padding/18.68.681
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,853
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,142
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01450,003
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,229
            Buddy_Dilation2D_Constant_Padding/10.2170.2173,262
            Buddy_Opening2D_Constant_Padding/10.3080.3082,262
            Buddy_Closing2D_Constant_Padding/10.310.312,236
            Buddy_TopHat2D_Constant_Padding/10.7770.777855
            Buddy_BottomHat2D_Constant_Padding/10.7960.796826
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,125
            OpenCV_Opening2D_Constant_Padding/10.2270.2273,079
            OpenCV_Closing2D_Constant_Padding/10.2260.2263,097
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,680
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,694
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,766
            OpenCV_Dilate2D_Constant_Padding/10.140.144,993
            -
            Console output -
            2025-06-01T10:08:16+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.02, 1.17, 1.93
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      21.6 ms         21.6 ms           32
            -MLIR_Conv2D/1                                           66.7 ms         66.7 ms           10
            -Buddy_Conv2D/1                                          2.34 ms         2.34 ms          299
            -Buddy_Corr2D_Constant_Padding/1                         4.67 ms         4.67 ms          150
            -OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4853
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105142
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        50003
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3229
            -Buddy_Dilation2D_Constant_Padding/1                    0.217 ms        0.217 ms         3262
            -Buddy_Opening2D_Constant_Padding/1                     0.308 ms        0.308 ms         2262
            -Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2236
            -Buddy_TopHat2D_Constant_Padding/1                      0.777 ms        0.777 ms          855
            -Buddy_BottomHat2D_Constant_Padding/1                   0.796 ms        0.796 ms          826
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5125
            -OpenCV_Opening2D_Constant_Padding/1                    0.227 ms        0.227 ms         3079
            -OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3097
            -OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2680
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2694
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2766
            -OpenCV_Dilate2D_Constant_Padding/1                     0.140 ms        0.140 ms         4993
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index e8850fdd..00000000 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            MLIR_Conv2D/11191196
            Buddy_Conv2D/13.913.91179
            Buddy_Corr2D_Constant_Padding/17.797.7990
            OpenCV_Filter2D_Constant_Padding/15.895.89119
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,837
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,099
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,521
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,257
            Buddy_Dilation2D_Constant_Padding/10.2160.2153,222
            Buddy_Opening2D_Constant_Padding/10.3290.3292,228
            Buddy_Closing2D_Constant_Padding/10.3140.3132,221
            Buddy_TopHat2D_Constant_Padding/10.7890.789845
            Buddy_BottomHat2D_Constant_Padding/10.7930.793825
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,117
            OpenCV_Opening2D_Constant_Padding/10.220.223,176
            OpenCV_Closing2D_Constant_Padding/10.220.223,179
            OpenCV_TopHat2D_Constant_Padding/10.2540.2542,758
            OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,740
            OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,779
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,176
            -
            Console output -
            2025-06-01T10:08:40+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.01, 1.16, 1.90
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      34.2 ms         34.2 ms           21
            -MLIR_Conv2D/1                                            119 ms          119 ms            6
            -Buddy_Conv2D/1                                          3.91 ms         3.91 ms          179
            -Buddy_Corr2D_Constant_Padding/1                         7.79 ms         7.79 ms           90
            -OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4837
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105099
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49521
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3257
            -Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.215 ms         3222
            -Buddy_Opening2D_Constant_Padding/1                     0.329 ms        0.329 ms         2228
            -Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.313 ms         2221
            -Buddy_TopHat2D_Constant_Padding/1                      0.789 ms        0.789 ms          845
            -Buddy_BottomHat2D_Constant_Padding/1                   0.793 ms        0.793 ms          825
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5117
            -OpenCV_Opening2D_Constant_Padding/1                    0.220 ms        0.220 ms         3176
            -OpenCV_Closing2D_Constant_Padding/1                    0.220 ms        0.220 ms         3179
            -OpenCV_TopHat2D_Constant_Padding/1                     0.254 ms        0.254 ms         2758
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2740
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2779
            -OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5176
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 5fdfc154..00000000 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            MLIR_Conv2D/11191196
            Buddy_Conv2D/13.983.98176
            Buddy_Corr2D_Constant_Padding/17.87.890
            OpenCV_Filter2D_Constant_Padding/15.895.89119
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,830
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,690
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,110
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,449
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,196
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,263
            Buddy_Opening2D_Constant_Padding/10.3130.3132,208
            Buddy_Closing2D_Constant_Padding/10.3270.3272,187
            Buddy_TopHat2D_Constant_Padding/10.8030.803835
            Buddy_BottomHat2D_Constant_Padding/10.7980.798832
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,076
            OpenCV_Opening2D_Constant_Padding/10.2290.2293,051
            OpenCV_Closing2D_Constant_Padding/10.230.233,037
            OpenCV_TopHat2D_Constant_Padding/10.2630.2632,671
            OpenCV_BottomHat2D_Constant_Padding/10.2630.2632,661
            OpenCV_MorphGrad2D_Constant_Padding/10.2570.2572,719
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,132
            -
            Console output -
            2025-06-01T10:09:04+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.14, 1.88
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      34.3 ms         34.3 ms           20
            -MLIR_Conv2D/1                                            119 ms          119 ms            6
            -Buddy_Conv2D/1                                          3.98 ms         3.98 ms          176
            -Buddy_Corr2D_Constant_Padding/1                         7.80 ms         7.80 ms           90
            -OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4830
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2690
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105110
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49449
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3196
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3263
            -Buddy_Opening2D_Constant_Padding/1                     0.313 ms        0.313 ms         2208
            -Buddy_Closing2D_Constant_Padding/1                     0.327 ms        0.327 ms         2187
            -Buddy_TopHat2D_Constant_Padding/1                      0.803 ms        0.803 ms          835
            -Buddy_BottomHat2D_Constant_Padding/1                   0.798 ms        0.798 ms          832
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5076
            -OpenCV_Opening2D_Constant_Padding/1                    0.229 ms        0.229 ms         3051
            -OpenCV_Closing2D_Constant_Padding/1                    0.230 ms        0.230 ms         3037
            -OpenCV_TopHat2D_Constant_Padding/1                     0.263 ms        0.263 ms         2671
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.263 ms        0.263 ms         2661
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.257 ms        0.257 ms         2719
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5132
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 1934206e..00000000 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            MLIR_Conv2D/17.27.297
            Buddy_Conv2D/10.7050.705994
            Buddy_Corr2D_Constant_Padding/11.071.07652
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,153
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,887
            Buddy_Erosion2D_Constant_Padding/10.2160.2163,272
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,249
            Buddy_Opening2D_Constant_Padding/10.3110.3112,259
            Buddy_Closing2D_Constant_Padding/10.3070.3072,229
            Buddy_TopHat2D_Constant_Padding/10.7770.777858
            Buddy_BottomHat2D_Constant_Padding/10.7670.767831
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,114
            OpenCV_Opening2D_Constant_Padding/10.2230.2233,131
            OpenCV_Closing2D_Constant_Padding/10.2220.2223,149
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,744
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,738
            OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,822
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,112
            -
            Console output -
            2025-06-01T10:03:44+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.01, 1.35, 2.23
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.74 ms         4.74 ms          148
            -MLIR_Conv2D/1                                           7.20 ms         7.20 ms           97
            -Buddy_Conv2D/1                                         0.705 ms        0.705 ms          994
            -Buddy_Corr2D_Constant_Padding/1                         1.07 ms         1.07 ms          652
            -OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105153
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49887
            -Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3272
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3249
            -Buddy_Opening2D_Constant_Padding/1                     0.311 ms        0.311 ms         2259
            -Buddy_Closing2D_Constant_Padding/1                     0.307 ms        0.307 ms         2229
            -Buddy_TopHat2D_Constant_Padding/1                      0.777 ms        0.777 ms          858
            -Buddy_BottomHat2D_Constant_Padding/1                   0.767 ms        0.767 ms          831
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5114
            -OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3131
            -OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3149
            -OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2744
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2738
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2822
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5112
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index e80c6249..00000000 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            MLIR_Conv2D/17.217.2197
            Buddy_Conv2D/10.7070.707988
            Buddy_Corr2D_Constant_Padding/11.051.05668
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,676
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666104,914
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,862
            Buddy_Erosion2D_Constant_Padding/10.2130.2133,188
            Buddy_Dilation2D_Constant_Padding/10.2160.2163,259
            Buddy_Opening2D_Constant_Padding/10.3170.3172,184
            Buddy_Closing2D_Constant_Padding/10.3140.3142,136
            Buddy_TopHat2D_Constant_Padding/10.7860.786814
            Buddy_BottomHat2D_Constant_Padding/10.7990.799847
            OpenCV_Erode2D_Constant_Padding/10.1390.1395,040
            OpenCV_Opening2D_Constant_Padding/10.2210.2213,163
            OpenCV_Closing2D_Constant_Padding/10.2190.2193,197
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,741
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,735
            OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,817
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,127
            -
            Console output -
            2025-06-01T10:04:08+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.01, 1.32, 2.20
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          144
            -MLIR_Conv2D/1                                           7.21 ms         7.21 ms           97
            -Buddy_Conv2D/1                                         0.707 ms        0.707 ms          988
            -Buddy_Corr2D_Constant_Padding/1                         1.05 ms         1.05 ms          668
            -OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2676
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104914
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49862
            -Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3188
            -Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.216 ms         3259
            -Buddy_Opening2D_Constant_Padding/1                     0.317 ms        0.317 ms         2184
            -Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.314 ms         2136
            -Buddy_TopHat2D_Constant_Padding/1                      0.786 ms        0.786 ms          814
            -Buddy_BottomHat2D_Constant_Padding/1                   0.799 ms        0.799 ms          847
            -OpenCV_Erode2D_Constant_Padding/1                      0.139 ms        0.139 ms         5040
            -OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3163
            -OpenCV_Closing2D_Constant_Padding/1                    0.219 ms        0.219 ms         3197
            -OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2741
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2735
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2817
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5127
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 924105bd..00000000 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            MLIR_Conv2D/129.129.124
            Buddy_Conv2D/12.042.04343
            Buddy_Corr2D_Constant_Padding/11.741.74400
            OpenCV_Filter2D_Constant_Padding/12.682.68261
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,858
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,687
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006670.00667104,992
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,870
            Buddy_Erosion2D_Constant_Padding/10.2160.2163,259
            Buddy_Dilation2D_Constant_Padding/10.2160.2163,239
            Buddy_Opening2D_Constant_Padding/10.320.322,223
            Buddy_Closing2D_Constant_Padding/10.3080.3082,211
            Buddy_TopHat2D_Constant_Padding/10.7910.791836
            Buddy_BottomHat2D_Constant_Padding/10.8060.805841
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,076
            OpenCV_Opening2D_Constant_Padding/10.2270.2273,086
            OpenCV_Closing2D_Constant_Padding/10.2250.2253,114
            OpenCV_TopHat2D_Constant_Padding/10.2640.2642,653
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,674
            OpenCV_MorphGrad2D_Constant_Padding/10.2550.2552,741
            OpenCV_Dilate2D_Constant_Padding/10.1380.1385,067
            -
            Console output -
            2025-06-01T10:04:31+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.30, 2.17
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           60
            -MLIR_Conv2D/1                                           29.1 ms         29.1 ms           24
            -Buddy_Conv2D/1                                          2.04 ms         2.04 ms          343
            -Buddy_Corr2D_Constant_Padding/1                         1.74 ms         1.74 ms          400
            -OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4858
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2687
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104992
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49870
            -Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3259
            -Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.216 ms         3239
            -Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2223
            -Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2211
            -Buddy_TopHat2D_Constant_Padding/1                      0.791 ms        0.791 ms          836
            -Buddy_BottomHat2D_Constant_Padding/1                   0.806 ms        0.805 ms          841
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5076
            -OpenCV_Opening2D_Constant_Padding/1                    0.227 ms        0.227 ms         3086
            -OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3114
            -OpenCV_TopHat2D_Constant_Padding/1                     0.264 ms        0.264 ms         2653
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2674
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.255 ms        0.255 ms         2741
            -OpenCV_Dilate2D_Constant_Padding/1                     0.138 ms        0.138 ms         5067
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 4af98efc..00000000 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            MLIR_Conv2D/1292924
            Buddy_Conv2D/12.082.08337
            Buddy_Corr2D_Constant_Padding/11.751.75399
            OpenCV_Filter2D_Constant_Padding/12.682.68261
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,856
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,688
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,682
            OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,744
            Buddy_Erosion2D_Constant_Padding/10.2190.2193,240
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,213
            Buddy_Opening2D_Constant_Padding/10.3080.3082,240
            Buddy_Closing2D_Constant_Padding/10.3090.3092,269
            Buddy_TopHat2D_Constant_Padding/10.820.82841
            Buddy_BottomHat2D_Constant_Padding/10.80.8846
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,072
            OpenCV_Opening2D_Constant_Padding/10.2230.2233,139
            OpenCV_Closing2D_Constant_Padding/10.2280.2283,074
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,680
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,676
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,755
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,189
            -
            Console output -
            2025-06-01T10:04:55+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.29, 2.15
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
            -MLIR_Conv2D/1                                           29.0 ms         29.0 ms           24
            -Buddy_Conv2D/1                                          2.08 ms         2.08 ms          337
            -Buddy_Corr2D_Constant_Padding/1                         1.75 ms         1.75 ms          399
            -OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4856
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2688
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104682
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49744
            -Buddy_Erosion2D_Constant_Padding/1                     0.219 ms        0.219 ms         3240
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3213
            -Buddy_Opening2D_Constant_Padding/1                     0.308 ms        0.308 ms         2240
            -Buddy_Closing2D_Constant_Padding/1                     0.309 ms        0.309 ms         2269
            -Buddy_TopHat2D_Constant_Padding/1                      0.820 ms        0.820 ms          841
            -Buddy_BottomHat2D_Constant_Padding/1                   0.800 ms        0.800 ms          846
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5072
            -OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3139
            -OpenCV_Closing2D_Constant_Padding/1                    0.228 ms        0.228 ms         3074
            -OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2680
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2676
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2755
            -OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5189
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 8c0f2c6b..00000000 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            MLIR_Conv2D/17.387.3895
            Buddy_Conv2D/10.5220.5211,363
            Buddy_Corr2D_Constant_Padding/10.8140.814865
            OpenCV_Filter2D_Constant_Padding/11.281.28548
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1460.1464,815
            Buddy_Resize2D_Bilinear_Interpolation/10.2670.2672,628
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006810.00681102,923
            OpenCV_Resize2D_Bilinear_Interpolation/10.01430.014348,919
            Buddy_Erosion2D_Constant_Padding/10.2220.2223,130
            Buddy_Dilation2D_Constant_Padding/10.220.223,158
            Buddy_Opening2D_Constant_Padding/10.3340.3342,177
            Buddy_Closing2D_Constant_Padding/10.3380.3382,114
            Buddy_TopHat2D_Constant_Padding/10.8930.893734
            Buddy_BottomHat2D_Constant_Padding/10.8990.899761
            OpenCV_Erode2D_Constant_Padding/10.1390.1395,020
            OpenCV_Opening2D_Constant_Padding/10.2170.2173,210
            OpenCV_Closing2D_Constant_Padding/10.220.223,180
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,639
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,732
            OpenCV_MorphGrad2D_Constant_Padding/10.2470.2472,827
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,123
            -
            Console output -
            2025-06-01T09:59:45+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.45, 1.75, 2.60
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      5.03 ms         5.03 ms          139
            -MLIR_Conv2D/1                                           7.38 ms         7.38 ms           95
            -Buddy_Conv2D/1                                         0.522 ms        0.521 ms         1363
            -Buddy_Corr2D_Constant_Padding/1                        0.814 ms        0.814 ms          865
            -OpenCV_Filter2D_Constant_Padding/1                      1.28 ms         1.28 ms          548
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.146 ms        0.146 ms         4815
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.267 ms        0.267 ms         2628
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       102923
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        48919
            -Buddy_Erosion2D_Constant_Padding/1                     0.222 ms        0.222 ms         3130
            -Buddy_Dilation2D_Constant_Padding/1                    0.220 ms        0.220 ms         3158
            -Buddy_Opening2D_Constant_Padding/1                     0.334 ms        0.334 ms         2177
            -Buddy_Closing2D_Constant_Padding/1                     0.338 ms        0.338 ms         2114
            -Buddy_TopHat2D_Constant_Padding/1                      0.893 ms        0.893 ms          734
            -Buddy_BottomHat2D_Constant_Padding/1                   0.899 ms        0.899 ms          761
            -OpenCV_Erode2D_Constant_Padding/1                      0.139 ms        0.139 ms         5020
            -OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3210
            -OpenCV_Closing2D_Constant_Padding/1                    0.220 ms        0.220 ms         3180
            -OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2639
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2732
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.247 ms        0.247 ms         2827
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5123
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 1e6ad21e..00000000 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            MLIR_Conv2D/17.197.1997
            Buddy_Conv2D/10.5240.5241,337
            Buddy_Corr2D_Constant_Padding/10.7920.792882
            OpenCV_Filter2D_Constant_Padding/11.251.25561
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,818
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.262,683
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,687
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,857
            Buddy_Erosion2D_Constant_Padding/10.2210.2213,251
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,237
            Buddy_Opening2D_Constant_Padding/10.3230.3232,241
            Buddy_Closing2D_Constant_Padding/10.3080.3082,271
            Buddy_TopHat2D_Constant_Padding/10.8050.805841
            Buddy_BottomHat2D_Constant_Padding/10.8090.809846
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,105
            OpenCV_Opening2D_Constant_Padding/10.2170.2173,219
            OpenCV_Closing2D_Constant_Padding/10.2170.2173,216
            OpenCV_TopHat2D_Constant_Padding/10.2580.2582,710
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,740
            OpenCV_MorphGrad2D_Constant_Padding/10.250.252,803
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,105
            -
            Console output -
            2025-06-01T10:00:09+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.48, 1.73, 2.57
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.89 ms         4.89 ms          143
            -MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
            -Buddy_Conv2D/1                                         0.524 ms        0.524 ms         1337
            -Buddy_Corr2D_Constant_Padding/1                        0.792 ms        0.792 ms          882
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          561
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4818
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.260 ms         2683
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104687
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49857
            -Buddy_Erosion2D_Constant_Padding/1                     0.221 ms        0.221 ms         3251
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3237
            -Buddy_Opening2D_Constant_Padding/1                     0.323 ms        0.323 ms         2241
            -Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2271
            -Buddy_TopHat2D_Constant_Padding/1                      0.805 ms        0.805 ms          841
            -Buddy_BottomHat2D_Constant_Padding/1                   0.809 ms        0.809 ms          846
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5105
            -OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3219
            -OpenCV_Closing2D_Constant_Padding/1                    0.217 ms        0.217 ms         3216
            -OpenCV_TopHat2D_Constant_Padding/1                     0.258 ms        0.258 ms         2710
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2740
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2803
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5105
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 4228eca8..00000000 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            MLIR_Conv2D/17.197.1997
            Buddy_Conv2D/10.5230.5231,334
            Buddy_Corr2D_Constant_Padding/10.7930.793882
            OpenCV_Filter2D_Constant_Padding/11.251.25561
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,679
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006680.00668104,555
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,894
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,270
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,269
            Buddy_Opening2D_Constant_Padding/10.320.322,113
            Buddy_Closing2D_Constant_Padding/10.3060.3062,219
            Buddy_TopHat2D_Constant_Padding/10.7810.781863
            Buddy_BottomHat2D_Constant_Padding/10.7950.795836
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,175
            OpenCV_Opening2D_Constant_Padding/10.2170.2173,222
            OpenCV_Closing2D_Constant_Padding/10.2220.2223,153
            OpenCV_TopHat2D_Constant_Padding/10.2570.2572,720
            OpenCV_BottomHat2D_Constant_Padding/10.2570.2572,721
            OpenCV_MorphGrad2D_Constant_Padding/10.250.252,805
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,121
            -
            Console output -
            2025-06-01T10:00:33+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.32, 1.67, 2.52
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.89 ms         4.89 ms          143
            -MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
            -Buddy_Conv2D/1                                         0.523 ms        0.523 ms         1334
            -Buddy_Corr2D_Constant_Padding/1                        0.793 ms        0.793 ms          882
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          561
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2679
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104555
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49894
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3270
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3269
            -Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2113
            -Buddy_Closing2D_Constant_Padding/1                     0.306 ms        0.306 ms         2219
            -Buddy_TopHat2D_Constant_Padding/1                      0.781 ms        0.781 ms          863
            -Buddy_BottomHat2D_Constant_Padding/1                   0.795 ms        0.795 ms          836
            -OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5175
            -OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3222
            -OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3153
            -OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2720
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.257 ms        0.257 ms         2721
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2805
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5121
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 45487d1b..00000000 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            MLIR_Conv2D/17.197.1897
            Buddy_Conv2D/10.5260.5261,322
            Buddy_Corr2D_Constant_Padding/10.7930.793885
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,865
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,697
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,155
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,883
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,259
            Buddy_Dilation2D_Constant_Padding/10.2130.2133,259
            Buddy_Opening2D_Constant_Padding/10.3150.3152,258
            Buddy_Closing2D_Constant_Padding/10.3120.3122,255
            Buddy_TopHat2D_Constant_Padding/10.7830.783854
            Buddy_BottomHat2D_Constant_Padding/10.7850.785821
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,119
            OpenCV_Opening2D_Constant_Padding/10.2210.2213,167
            OpenCV_Closing2D_Constant_Padding/10.2230.2233,139
            OpenCV_TopHat2D_Constant_Padding/10.2580.2582,712
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,733
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,804
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,118
            -
            Console output -
            2025-06-01T10:00:56+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.23, 1.63, 2.49
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.87 ms         4.87 ms          144
            -MLIR_Conv2D/1                                           7.19 ms         7.18 ms           97
            -Buddy_Conv2D/1                                         0.526 ms        0.526 ms         1322
            -Buddy_Corr2D_Constant_Padding/1                        0.793 ms        0.793 ms          885
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4865
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2697
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105155
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49883
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3259
            -Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3259
            -Buddy_Opening2D_Constant_Padding/1                     0.315 ms        0.315 ms         2258
            -Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2255
            -Buddy_TopHat2D_Constant_Padding/1                      0.783 ms        0.783 ms          854
            -Buddy_BottomHat2D_Constant_Padding/1                   0.785 ms        0.785 ms          821
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5119
            -OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3167
            -OpenCV_Closing2D_Constant_Padding/1                    0.223 ms        0.223 ms         3139
            -OpenCV_TopHat2D_Constant_Padding/1                     0.258 ms        0.258 ms         2712
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2733
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2804
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5118
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index c5c20603..00000000 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            MLIR_Conv2D/128.928.924
            Buddy_Conv2D/13.033.03231
            Buddy_Corr2D_Constant_Padding/12.312.31303
            OpenCV_Filter2D_Constant_Padding/14.114.11171
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,848
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,211
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,990
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,269
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,239
            Buddy_Opening2D_Constant_Padding/10.3140.3142,276
            Buddy_Closing2D_Constant_Padding/10.3080.3082,257
            Buddy_TopHat2D_Constant_Padding/10.8040.804840
            Buddy_BottomHat2D_Constant_Padding/10.7830.783836
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,131
            OpenCV_Opening2D_Constant_Padding/10.2240.2243,120
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,078
            OpenCV_TopHat2D_Constant_Padding/10.260.262,688
            OpenCV_BottomHat2D_Constant_Padding/10.2590.2592,702
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,764
            OpenCV_Dilate2D_Constant_Padding/10.1390.1395,022
            -
            Console output -
            2025-06-01T10:01:20+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.15, 1.58, 2.45
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           61
            -MLIR_Conv2D/1                                           28.9 ms         28.9 ms           24
            -Buddy_Conv2D/1                                          3.03 ms         3.03 ms          231
            -Buddy_Corr2D_Constant_Padding/1                         2.31 ms         2.31 ms          303
            -OpenCV_Filter2D_Constant_Padding/1                      4.11 ms         4.11 ms          171
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4848
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105211
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49990
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3269
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3239
            -Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2276
            -Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2257
            -Buddy_TopHat2D_Constant_Padding/1                      0.804 ms        0.804 ms          840
            -Buddy_BottomHat2D_Constant_Padding/1                   0.783 ms        0.783 ms          836
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5131
            -OpenCV_Opening2D_Constant_Padding/1                    0.224 ms        0.224 ms         3120
            -OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3078
            -OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2688
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.259 ms        0.259 ms         2702
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2764
            -OpenCV_Dilate2D_Constant_Padding/1                     0.139 ms        0.139 ms         5022
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 17e95ee7..00000000 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            MLIR_Conv2D/128.728.724
            Buddy_Conv2D/13.033.03231
            Buddy_Corr2D_Constant_Padding/12.312.31302
            OpenCV_Filter2D_Constant_Padding/14.14.1170
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,835
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006680.00668104,962
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,843
            Buddy_Erosion2D_Constant_Padding/10.2150.2153,259
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,254
            Buddy_Opening2D_Constant_Padding/10.3070.3072,262
            Buddy_Closing2D_Constant_Padding/10.3190.3192,241
            Buddy_TopHat2D_Constant_Padding/10.7840.784851
            Buddy_BottomHat2D_Constant_Padding/10.7770.777840
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,192
            OpenCV_Opening2D_Constant_Padding/10.2310.2313,030
            OpenCV_Closing2D_Constant_Padding/10.2290.2293,053
            OpenCV_TopHat2D_Constant_Padding/10.2680.2682,609
            OpenCV_BottomHat2D_Constant_Padding/10.2670.2672,624
            OpenCV_MorphGrad2D_Constant_Padding/10.2580.2582,714
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,119
            -
            Console output -
            2025-06-01T10:01:44+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.10, 1.53, 2.41
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
            -MLIR_Conv2D/1                                           28.7 ms         28.7 ms           24
            -Buddy_Conv2D/1                                          3.03 ms         3.03 ms          231
            -Buddy_Corr2D_Constant_Padding/1                         2.31 ms         2.31 ms          302
            -OpenCV_Filter2D_Constant_Padding/1                      4.10 ms         4.10 ms          170
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4835
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104962
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49843
            -Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3259
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3254
            -Buddy_Opening2D_Constant_Padding/1                     0.307 ms        0.307 ms         2262
            -Buddy_Closing2D_Constant_Padding/1                     0.319 ms        0.319 ms         2241
            -Buddy_TopHat2D_Constant_Padding/1                      0.784 ms        0.784 ms          851
            -Buddy_BottomHat2D_Constant_Padding/1                   0.777 ms        0.777 ms          840
            -OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5192
            -OpenCV_Opening2D_Constant_Padding/1                    0.231 ms        0.231 ms         3030
            -OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3053
            -OpenCV_TopHat2D_Constant_Padding/1                     0.268 ms        0.268 ms         2609
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.267 ms        0.267 ms         2624
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.258 ms        0.258 ms         2714
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5119
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 6755990d..00000000 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            MLIR_Conv2D/166.766.711
            Buddy_Conv2D/16.126.12114
            Buddy_Corr2D_Constant_Padding/14.654.65151
            OpenCV_Filter2D_Constant_Padding/18.68.681
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,138
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,889
            Buddy_Erosion2D_Constant_Padding/10.2130.2133,257
            Buddy_Dilation2D_Constant_Padding/10.2130.2133,242
            Buddy_Opening2D_Constant_Padding/10.3130.3132,237
            Buddy_Closing2D_Constant_Padding/10.3180.3182,232
            Buddy_TopHat2D_Constant_Padding/10.7750.775853
            Buddy_BottomHat2D_Constant_Padding/10.7880.788846
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,075
            OpenCV_Opening2D_Constant_Padding/10.2320.2323,015
            OpenCV_Closing2D_Constant_Padding/10.2250.2253,114
            OpenCV_TopHat2D_Constant_Padding/10.2640.2642,647
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,672
            OpenCV_MorphGrad2D_Constant_Padding/10.2550.2552,749
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,111
            -
            Console output -
            2025-06-01T10:02:08+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.49, 2.37
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      21.6 ms         21.6 ms           31
            -MLIR_Conv2D/1                                           66.7 ms         66.7 ms           11
            -Buddy_Conv2D/1                                          6.12 ms         6.12 ms          114
            -Buddy_Corr2D_Constant_Padding/1                         4.65 ms         4.65 ms          151
            -OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105138
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49889
            -Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3257
            -Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3242
            -Buddy_Opening2D_Constant_Padding/1                     0.313 ms        0.313 ms         2237
            -Buddy_Closing2D_Constant_Padding/1                     0.318 ms        0.318 ms         2232
            -Buddy_TopHat2D_Constant_Padding/1                      0.775 ms        0.775 ms          853
            -Buddy_BottomHat2D_Constant_Padding/1                   0.788 ms        0.788 ms          846
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5075
            -OpenCV_Opening2D_Constant_Padding/1                    0.232 ms        0.232 ms         3015
            -OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3114
            -OpenCV_TopHat2D_Constant_Padding/1                     0.264 ms        0.264 ms         2647
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2672
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.255 ms        0.255 ms         2749
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5111
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 22c23acf..00000000 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            MLIR_Conv2D/166.666.611
            Buddy_Conv2D/16.136.13114
            Buddy_Corr2D_Constant_Padding/14.654.65151
            OpenCV_Filter2D_Constant_Padding/18.68.681
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,362
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,959
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,220
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,263
            Buddy_Opening2D_Constant_Padding/10.310.312,246
            Buddy_Closing2D_Constant_Padding/10.310.312,217
            Buddy_TopHat2D_Constant_Padding/10.7780.778828
            Buddy_BottomHat2D_Constant_Padding/10.7930.793833
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,129
            OpenCV_Opening2D_Constant_Padding/10.2260.2263,091
            OpenCV_Closing2D_Constant_Padding/10.2260.2263,096
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,677
            OpenCV_BottomHat2D_Constant_Padding/10.2610.2612,684
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,763
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,132
            -
            Console output -
            2025-06-01T10:02:32+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.04, 1.45, 2.33
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      21.5 ms         21.5 ms           32
            -MLIR_Conv2D/1                                           66.6 ms         66.6 ms           11
            -Buddy_Conv2D/1                                          6.13 ms         6.13 ms          114
            -Buddy_Corr2D_Constant_Padding/1                         4.65 ms         4.65 ms          151
            -OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105362
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49959
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3220
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3263
            -Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2246
            -Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2217
            -Buddy_TopHat2D_Constant_Padding/1                      0.778 ms        0.778 ms          828
            -Buddy_BottomHat2D_Constant_Padding/1                   0.793 ms        0.793 ms          833
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5129
            -OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3091
            -OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3096
            -OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2677
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.261 ms        0.261 ms         2684
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2763
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5132
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 77e47d45..00000000 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            MLIR_Conv2D/11441445
            Buddy_Conv2D/110.510.567
            Buddy_Corr2D_Constant_Padding/17.957.9590
            OpenCV_Filter2D_Constant_Padding/15.895.89120
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,856
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,165
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,405
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,218
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,226
            Buddy_Opening2D_Constant_Padding/10.3120.3122,175
            Buddy_Closing2D_Constant_Padding/10.3120.3122,264
            Buddy_TopHat2D_Constant_Padding/10.8210.821843
            Buddy_BottomHat2D_Constant_Padding/10.8180.818844
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,142
            OpenCV_Opening2D_Constant_Padding/10.2210.2213,174
            OpenCV_Closing2D_Constant_Padding/10.2210.2213,164
            OpenCV_TopHat2D_Constant_Padding/10.2560.2562,735
            OpenCV_BottomHat2D_Constant_Padding/10.2580.2582,710
            OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,787
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,117
            -
            Console output -
            2025-06-01T10:02:56+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.03, 1.42, 2.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      41.5 ms         41.5 ms           17
            -MLIR_Conv2D/1                                            144 ms          144 ms            5
            -Buddy_Conv2D/1                                          10.5 ms         10.5 ms           67
            -Buddy_Corr2D_Constant_Padding/1                         7.95 ms         7.95 ms           90
            -OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          120
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4856
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2692
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105165
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49405
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3218
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3226
            -Buddy_Opening2D_Constant_Padding/1                     0.312 ms        0.312 ms         2175
            -Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2264
            -Buddy_TopHat2D_Constant_Padding/1                      0.821 ms        0.821 ms          843
            -Buddy_BottomHat2D_Constant_Padding/1                   0.818 ms        0.818 ms          844
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5142
            -OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3174
            -OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3164
            -OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2735
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.258 ms        0.258 ms         2710
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2787
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5117
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index f03bcec8..00000000 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 16:50:40 UTC

            -

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            MLIR_Conv2D/11191196
            Buddy_Conv2D/110.510.567
            Buddy_Corr2D_Constant_Padding/17.897.8990
            OpenCV_Filter2D_Constant_Padding/15.895.89119
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,690
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,068
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,449
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,244
            Buddy_Dilation2D_Constant_Padding/10.2250.2253,243
            Buddy_Opening2D_Constant_Padding/10.3070.3072,260
            Buddy_Closing2D_Constant_Padding/10.3130.3132,223
            Buddy_TopHat2D_Constant_Padding/10.8180.818827
            Buddy_BottomHat2D_Constant_Padding/10.7970.796861
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,101
            OpenCV_Opening2D_Constant_Padding/10.2190.2193,187
            OpenCV_Closing2D_Constant_Padding/10.2220.2223,142
            OpenCV_TopHat2D_Constant_Padding/10.2560.2562,731
            OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,740
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,815
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,206
            -
            Console output -
            2025-06-01T10:03:20+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.02, 1.38, 2.27
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      34.2 ms         34.2 ms           20
            -MLIR_Conv2D/1                                            119 ms          119 ms            6
            -Buddy_Conv2D/1                                          10.5 ms         10.5 ms           67
            -Buddy_Corr2D_Constant_Padding/1                         7.89 ms         7.89 ms           90
            -OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2690
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105068
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49449
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3244
            -Buddy_Dilation2D_Constant_Padding/1                    0.225 ms        0.225 ms         3243
            -Buddy_Opening2D_Constant_Padding/1                     0.307 ms        0.307 ms         2260
            -Buddy_Closing2D_Constant_Padding/1                     0.313 ms        0.313 ms         2223
            -Buddy_TopHat2D_Constant_Padding/1                      0.818 ms        0.818 ms          827
            -Buddy_BottomHat2D_Constant_Padding/1                   0.797 ms        0.796 ms          861
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5101
            -OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3187
            -OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3142
            -OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2731
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2740
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2815
            -OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5206
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html deleted file mode 100644 index f32e27e5..00000000 --- a/site/vectorization/vectorization_matrix.html +++ /dev/null @@ -1,40 +0,0 @@ - - - -

            vectorization/vectorization_matrix.json

            2025-07-27 16:50:40 UTC

            -

            vectorization_matrix.json

            - - -
            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            MLIR_MatVec/120.520.535,030,976
            -
            Console output -
            2025-06-01T10:11:11+00:00
            -Running ./vectorization-matrix-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.09, 1.76
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------
            -Benchmark              Time             CPU   Iterations
            ---------------------------------------------------------
            -MLIR_MatMul/1       18.8 ns         18.8 ns     37302822
            -MLIR_MatVec/1       20.5 ns         20.5 ns     35030976
            ---------------------------------------------------------
            -MLIR_MatMul: MLIR MatMul Operation + Nested Loop
            -[ 18 18 18 18 18 18 18 18 18 18 ]
            ---------------------------------------------------------
            -MLIR_MatVec: MLIR MatVec Operation
            -[ 18 18 18 18 18 18 18 18 18 18 ]
            -
            \ No newline at end of file From cd72f2e6c3ff1f826032e7389ef221ea450c23e7 Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 17:18:59 +0000 Subject: [PATCH 36/52] test --- .github/workflows/bench.yml | 2 +- .../benchmarks/2025-07-27/index.html | 14 +++ .../deeplearning/dl-layer-ffn-benchmark.html | 37 ++++++++ .../dl-layer-rmsnorm-benchmark.html | 37 ++++++++ .../dl-layer-selfattention-benchmark.html | 37 ++++++++ .../dl-model-lenet-benchmark.html | 38 ++++++++ .../dl-model-mobilenetv3-benchmark.html | 38 ++++++++ .../dl-model-resnet18-benchmark.html | 37 ++++++++ .../dl-model-tinyllama-benchmark.html | 39 ++++++++ .../dl-model-whisper-benchmark.html | 38 ++++++++ .../dl-op-linalg-arithaddf-benchmark.html | 38 ++++++++ .../dl-op-linalg-arithdivf-benchmark.html | 38 ++++++++ .../dl-op-linalg-arithmulf-benchmark.html | 38 ++++++++ .../dl-op-linalg-arithnegf-benchmark.html | 38 ++++++++ .../dl-op-linalg-arithsubf-benchmark.html | 38 ++++++++ .../dl-op-linalg-batch-matmul-benchmark.html | 49 ++++++++++ ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 38 ++++++++ ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 42 ++++++++ ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 38 ++++++++ ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 39 ++++++++ .../dl-op-linalg-mathexp-benchmark.html | 38 ++++++++ .../dl-op-linalg-mathfpow-benchmark.html | 38 ++++++++ .../dl-op-linalg-mathrsqrt-benchmark.html | 38 ++++++++ .../dl-op-linalg-matmul-benchmark.html | 44 +++++++++ ...-op-linalg-pooling-nhwc-sum-benchmark.html | 38 ++++++++ .../dl-op-linalg-reduceaddf-benchmark.html | 26 +++++ .../dl-op-linalg-reducemaxf-benchmark.html | 26 +++++ ...-linalg-softmax-exp-sum-div-benchmark.html | 38 ++++++++ .../dl-op-matmul-transpose-b-benchmark.html | 42 ++++++++ .../dl-op-tosa-transpose-benchmark.html | 36 +++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 +++++++++++++++++++ ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 +++++++++++++++++++ site/benchmarks/2025-07-27/index.html | 14 +++ .../vectorization/vectorization_matrix.html | 40 ++++++++ site/benchmarks/latest/index.html | 1 + 61 files changed, 3791 insertions(+), 1 deletion(-) create mode 100644 site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html create mode 100644 site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html create mode 100644 site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html create mode 100644 site/benchmarks/2025-07-27/index.html create mode 100644 site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html create mode 100644 site/benchmarks/latest/index.html diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index e00dbe5c..e2f23291 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -110,7 +110,7 @@ jobs: with: personal_token: ${{ secrets.BUDDY_SITE_PAT }} external_repository: buddy-compiler/buddy-compiler.github.io - publish_dir: "${{ env.BENCH_DIR }}" # this now holds index.html + reports + publish_dir: "${{ env.BENCH_DIR }}" destination_dir: benchmarks/${{ github.sha }} publish_branch: master keep_files: true diff --git a/site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html b/site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html new file mode 100644 index 00000000..1641d47e --- /dev/null +++ b/site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html @@ -0,0 +1,14 @@ + + +

            Buddy-Benchmark results

              + +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html new file mode 100644 index 00000000..4014096d --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html @@ -0,0 +1,37 @@ + + + +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-layer-ffn-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06540.065410,762
            DL_LAYER_FFN/Auto_Vectorization0.02710.027125,673
            +
            Console output +
            2025-07-27T14:26:49+00:00
            +Running ./dl-layer-ffn-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.04, 1.19, 1.31
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------
            +Benchmark                                Time             CPU   Iterations
            +--------------------------------------------------------------------------
            +DL_LAYER_FFN/Scalar                  0.065 ms        0.065 ms        10762
            +DL_LAYER_FFN/Auto_Vectorization      0.027 ms        0.027 ms        25673
            +-----------------------------------------------------------
            +Correctness Verification: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html new file mode 100644 index 00000000..4902feb5 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -0,0 +1,37 @@ + + + +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-layer-rmsnorm-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196356,202
            DL_LAYER_RMSNORM/Auto_Vectorization0.0009150.000915751,546
            +
            Console output +
            2025-07-27T14:26:53+00:00
            +Running ./dl-layer-rmsnorm-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.03, 1.19, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +------------------------------------------------------------------------------
            +Benchmark                                    Time             CPU   Iterations
            +------------------------------------------------------------------------------
            +DL_LAYER_RMSNORM/Scalar                  0.002 ms        0.002 ms       356202
            +DL_LAYER_RMSNORM/Auto_Vectorization      0.001 ms        0.001 ms       751546
            +-----------------------------------------------------------
            +Correctness Verification: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html new file mode 100644 index 00000000..65b08cd1 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html @@ -0,0 +1,37 @@ + + + +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-layer-selfattention-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            DL_LAYER_ATTENTION/Auto_Vectorization1.571.57446
            +
            Console output +
            2025-07-27T14:26:51+00:00
            +Running ./dl-layer-selfattention-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.04, 1.19, 1.31
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------
            +Benchmark                                      Time             CPU   Iterations
            +--------------------------------------------------------------------------------
            +DL_LAYER_ATTENTION/Scalar                   4.69 ms         4.69 ms          149
            +DL_LAYER_ATTENTION/Auto_Vectorization       1.57 ms         1.57 ms          446
            +-----------------------------------------------------------
            +Correctness Verification: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html new file mode 100644 index 00000000..3a412d45 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-model-lenet-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304
            DL_MODEL_LENET/Buddy_Vectorization0.1370.1375,022
            +
            Console output +
            2025-07-27T14:22:52+00:00
            +Running ./dl-model-lenet-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.40, 1.39, 1.40
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +-----------------------------------------------------------------------------
            +Benchmark                                   Time             CPU   Iterations
            +-----------------------------------------------------------------------------
            +DL_MODEL_LENET/Auto_Vectorization       0.165 ms        0.165 ms         4304
            +DL_MODEL_LENET/Buddy_Vectorization      0.137 ms        0.137 ms         5022
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: FAIL
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html new file mode 100644 index 00000000..153f70be --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-model-mobilenetv3-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119
            BM_MobileNet_V3/BM_MobileNet_V3_conv_opt333321
            +
            Console output +
            2025-07-27T14:22:49+00:00
            +Running ./dl-model-mobilenetv3-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.40, 1.39, 1.40
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +-----------------------------------------------------------------------------------
            +Benchmark                                         Time             CPU   Iterations
            +-----------------------------------------------------------------------------------
            +BM_MobileNet_V3/BM_MobileNet_V3_scalar         37.1 ms         37.1 ms           19
            +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt       33.0 ms         33.0 ms           21
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html new file mode 100644 index 00000000..e3f9ddf5 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html @@ -0,0 +1,37 @@ + + + +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-model-resnet18-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7317231
            DL_MODEL_Resnet18/Buddy_Vectorization7297221
            +
            Console output +
            2025-07-27T14:26:46+00:00
            +Running ./dl-model-resnet18-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.04, 1.19, 1.31
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------
            +Benchmark                                      Time             CPU   Iterations
            +--------------------------------------------------------------------------------
            +DL_MODEL_Resnet18/Auto_Vectorization         731 ms          723 ms            1
            +DL_MODEL_Resnet18/Buddy_Vectorization        729 ms          722 ms            1
            +-----------------------------------------------------------
            +Correctness Verification: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html new file mode 100644 index 00000000..8c7fa324 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html @@ -0,0 +1,39 @@ + + + +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-model-tinyllama-benchmark.json

            + + + +
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051
            DL_MODEL_TINYLLAMA/matmul_opt1e+041e+041
            DL_MODEL_TINYLLAMA/matmul_opt_omp7.84e+037.2e+031
            +
            Console output +
            2025-07-27T14:17:33+00:00
            +Running ./dl-model-tinyllama-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.70, 1.92, 1.54
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +----------------------------------------------------------------------------
            +Benchmark                                  Time             CPU   Iterations
            +----------------------------------------------------------------------------
            +DL_MODEL_TINYLLAMA/scalar             139185 ms       139179 ms            1
            +DL_MODEL_TINYLLAMA/matmul_opt          10038 ms        10038 ms            1
            +DL_MODEL_TINYLLAMA/matmul_opt_omp       7836 ms         7201 ms            1
            +---------- Verification ----------
            +matmul_opt PASS
            +matmul_opt_omp PASS
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html new file mode 100644 index 00000000..27cd97e0 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-model-whisper-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization8e+048e+041
            DL_MODEL_Whisper/Buddy_Vectorization3.67e+043.67e+041
            +
            Console output +
            2025-07-27T14:22:54+00:00
            +Running ./dl-model-whisper-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.45, 1.40, 1.40
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +-------------------------------------------------------------------------------
            +Benchmark                                     Time             CPU   Iterations
            +-------------------------------------------------------------------------------
            +DL_MODEL_Whisper/Auto_Vectorization       79983 ms        79980 ms            1
            +DL_MODEL_Whisper/Buddy_Vectorization      36713 ms        36700 ms            1
            +-----------------------------------------------------------
            +Correctness Verification for Output1: PASS
            +Correctness Verification for Output2: FAIL
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html new file mode 100644 index 00000000..bd191a74 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-arithaddf-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02950.029523,451
            BM_ADDF_AutoVectorization0.0040.004174,931
            +
            Console output +
            2025-07-27T14:27:23+00:00
            +Running ./dl-op-linalg-arithaddf-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.07, 1.18, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------
            +Benchmark                          Time             CPU   Iterations
            +--------------------------------------------------------------------
            +BM_ADDF_SCALAR                 0.030 ms        0.030 ms        23451
            +BM_ADDF_AutoVectorization      0.004 ms        0.004 ms       174931
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html new file mode 100644 index 00000000..47bbe164 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-arithdivf-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02980.029823,358
            BM_DIVF_AutoVectorization0.009490.0094967,517
            +
            Console output +
            2025-07-27T14:27:25+00:00
            +Running ./dl-op-linalg-arithdivf-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.07, 1.18, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------
            +Benchmark                          Time             CPU   Iterations
            +--------------------------------------------------------------------
            +BM_DIVF_SCALAR                 0.030 ms        0.030 ms        23358
            +BM_DIVF_AutoVectorization      0.009 ms        0.009 ms        67517
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html new file mode 100644 index 00000000..d36d41d0 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-arithmulf-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02980.029823,441
            BM_MULF_AutoVectorization0.0040.004175,263
            +
            Console output +
            2025-07-27T14:27:27+00:00
            +Running ./dl-op-linalg-arithmulf-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.07, 1.18, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------
            +Benchmark                          Time             CPU   Iterations
            +--------------------------------------------------------------------
            +BM_MULF_SCALAR                 0.030 ms        0.030 ms        23441
            +BM_MULF_AutoVectorization      0.004 ms        0.004 ms       175263
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html new file mode 100644 index 00000000..481912bc --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-arithnegf-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02250.022530,969
            BM_NEGF_AutoVectorization0.002460.00246277,205
            +
            Console output +
            2025-07-27T14:27:29+00:00
            +Running ./dl-op-linalg-arithnegf-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.07, 1.18, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------
            +Benchmark                          Time             CPU   Iterations
            +--------------------------------------------------------------------
            +BM_NEGF_SCALAR                 0.023 ms        0.023 ms        30969
            +BM_NEGF_AutoVectorization      0.002 ms        0.002 ms       277205
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html new file mode 100644 index 00000000..ef9a2d7e --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-arithsubf-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02940.029423,509
            BM_SUBF_AutoVectorization0.003990.00399175,223
            +
            Console output +
            2025-07-27T14:27:31+00:00
            +Running ./dl-op-linalg-arithsubf-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.07, 1.18, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------
            +Benchmark                          Time             CPU   Iterations
            +--------------------------------------------------------------------
            +BM_SUBF_SCALAR                 0.029 ms        0.029 ms        23509
            +BM_SUBF_AutoVectorization      0.004 ms        0.004 ms       175223
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html new file mode 100644 index 00000000..24cb1ff1 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -0,0 +1,49 @@ + + + +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-batch-matmul-benchmark.json

            + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:19769761
            DL_OPS_BATCH_MATMUL/Vectorization/iterations:11951951
            DL_OPS_BATCH_MATMUL/Tile/iterations:11091091
            DL_OPS_BATCH_MATMUL/SCF/iterations:11181181
            DL_OPS_BATCH_MATMUL/BROADCAST/iterations:13563561
            DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:16232.11
            +
            Console output +
            2025-07-27T14:27:13+00:00
            +Running ./dl-op-linalg-batch-matmul-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.08, 1.19, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +---------------------------------------------------------------------------------------------
            +Benchmark                                                   Time             CPU   Iterations
            +---------------------------------------------------------------------------------------------
            +DL_OPS_BATCH_MATMUL/Scalar/iterations:1                  3536 ms         3536 ms            1
            +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1        976 ms          976 ms            1
            +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1            195 ms          195 ms            1
            +DL_OPS_BATCH_MATMUL/Tile/iterations:1                     109 ms          109 ms            1
            +DL_OPS_BATCH_MATMUL/SCF/iterations:1                      118 ms          118 ms            1
            +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1                356 ms          356 ms            1
            +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1           62.0 ms         32.1 ms            1
            +---------- Verification ----------
            +Tile PASS
            +SCF PASS
            +BROADCAST PASS
            +BROADCAST_OMP PASS
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html new file mode 100644 index 00000000..07ef332e --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            BM_Conv2DNchwFchw_Im2col6.86.8101
            +
            Console output +
            2025-07-27T14:27:06+00:00
            +Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.10, 1.20, 1.31
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +-------------------------------------------------------------------
            +Benchmark                         Time             CPU   Iterations
            +-------------------------------------------------------------------
            +BM_Conv2DNchwFchw_SCALAR        283 ms          283 ms            2
            +BM_Conv2DNchwFchw_Im2col       6.80 ms         6.80 ms          101
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html new file mode 100644 index 00000000..2b39ddd0 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -0,0 +1,42 @@ + + + +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            + + + + +
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:59.359.355
            DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:51.821.825
            DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:51.821.825
            +
            Console output +
            2025-07-27T14:27:10+00:00
            +Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.09, 1.19, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +---------------------------------------------------------------------------------------------------
            +Benchmark                                                         Time             CPU   Iterations
            +---------------------------------------------------------------------------------------------------
            +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5                   72.3 ms         72.3 ms            5
            +DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5       9.35 ms         9.35 ms            5
            +DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5            1.82 ms         1.82 ms            5
            +DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5                 1.82 ms         1.82 ms            5
            +---------- Verification ----------
            +auto_vectorization PASS
            +vectorization PASS
            +vec_tile PASS
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html new file mode 100644 index 00000000..e986e4b4 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322
            BM_CONV_2D_NHWC_HWCF_AutoVectorization6.136.13113
            +
            Console output +
            2025-07-27T14:27:08+00:00
            +Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.09, 1.19, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +---------------------------------------------------------------------------------
            +Benchmark                                       Time             CPU   Iterations
            +---------------------------------------------------------------------------------
            +BM_CONV_2D_NHWC_HWCF_SCALAR                  32.3 ms         32.3 ms           22
            +BM_CONV_2D_NHWC_HWCF_AutoVectorization       6.13 ms         6.13 ms          113
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html new file mode 100644 index 00000000..915ff12a --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -0,0 +1,39 @@ + + + +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            + + + +
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:51.711.715
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:50.1250.1255
            +
            Console output +
            2025-07-27T14:27:11+00:00
            +Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.09, 1.19, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +------------------------------------------------------------------------------------------------------------
            +Benchmark                                                                  Time             CPU   Iterations
            +------------------------------------------------------------------------------------------------------------
            +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5                   4.25 ms         4.25 ms            5
            +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5       1.71 ms         1.71 ms            5
            +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5           0.125 ms        0.125 ms            5
            +---------- Verification ----------
            +auto_vectorization PASS
            +vectorization PASS
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html new file mode 100644 index 00000000..471b0e0a --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-mathexp-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,225
            BM_EXP_AutoVectorization0.03160.031622,248
            +
            Console output +
            2025-07-27T14:27:37+00:00
            +Running ./dl-op-linalg-mathexp-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.06, 1.18, 1.29
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +-------------------------------------------------------------------
            +Benchmark                         Time             CPU   Iterations
            +-------------------------------------------------------------------
            +BM_EXP_SCALAR                 0.046 ms        0.046 ms        15225
            +BM_EXP_AutoVectorization      0.032 ms        0.032 ms        22248
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html new file mode 100644 index 00000000..f84f32bf --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-mathfpow-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08410.08418,255
            BM_FPOW_AutoVectorization0.05690.056912,305
            +
            Console output +
            2025-07-27T14:27:33+00:00
            +Running ./dl-op-linalg-mathfpow-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.06, 1.18, 1.29
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------
            +Benchmark                          Time             CPU   Iterations
            +--------------------------------------------------------------------
            +BM_FPOW_SCALAR                 0.084 ms        0.084 ms         8255
            +BM_FPOW_AutoVectorization      0.057 ms        0.057 ms        12305
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html new file mode 100644 index 00000000..ed460f7b --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-mathrsqrt-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,537
            BM_RSQRT_AutoVectorization0.004350.00435160,927
            +
            Console output +
            2025-07-27T14:27:35+00:00
            +Running ./dl-op-linalg-mathrsqrt-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.06, 1.18, 1.29
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +---------------------------------------------------------------------
            +Benchmark                           Time             CPU   Iterations
            +---------------------------------------------------------------------
            +BM_RSQRT_SCALAR                 0.073 ms        0.073 ms         9537
            +BM_RSQRT_AutoVectorization      0.004 ms        0.004 ms       160927
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html new file mode 100644 index 00000000..edf19084 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -0,0 +1,44 @@ + + + +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-matmul-benchmark.json

            + + + + + +
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031
            DL_OPS_MATMUL/scalar_O3/iterations:13.21e+033.21e+031
            DL_OPS_MATMUL/tile/iterations:11171171
            DL_OPS_MATMUL/vec/iterations:159.959.91
            DL_OPS_MATMUL/vec_omp/iterations:1229.111
            +
            Console output +
            2025-07-27T14:26:55+00:00
            +Running ./dl-op-linalg-matmul-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.03, 1.19, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +-------------------------------------------------------------------------------
            +Benchmark                                     Time             CPU   Iterations
            +-------------------------------------------------------------------------------
            +DL_OPS_MATMUL/scalar_O0/iterations:1       3933 ms         3933 ms            1
            +DL_OPS_MATMUL/scalar_O3/iterations:1       3212 ms         3212 ms            1
            +DL_OPS_MATMUL/tile/iterations:1             117 ms          117 ms            1
            +DL_OPS_MATMUL/vec/iterations:1             59.9 ms         59.9 ms            1
            +DL_OPS_MATMUL/vec_omp/iterations:1         22.0 ms         9.11 ms            1
            +---------- Verification ----------
            +tile PASS
            +vec PASS
            +vec_omp PASS
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html new file mode 100644 index 00000000..4c682618 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002
            BM_POOLING_NHWC_SUM_AutoVectorization0.04140.041416,950
            +
            Console output +
            2025-07-27T14:27:11+00:00
            +Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.09, 1.19, 1.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------
            +Benchmark                                      Time             CPU   Iterations
            +--------------------------------------------------------------------------------
            +BM_POOLING_NHWC_SUM_SCALAR                 0.233 ms        0.233 ms         3002
            +BM_POOLING_NHWC_SUM_AutoVectorization      0.041 ms        0.041 ms        16950
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html new file mode 100644 index 00000000..fc53559a --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -0,0 +1,26 @@ + + + +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 17:05:43 UTC

            +
            ⚠ FAILED: JSON parse error: Expecting value
            +
            Console output +
            2025-07-27T14:27:39+00:00
            +Running ./dl-op-linalg-reduceaddf-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.06, 1.17, 1.29
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html new file mode 100644 index 00000000..dd5a2659 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html @@ -0,0 +1,26 @@ + + + +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 17:05:43 UTC

            +
            ⚠ FAILED: JSON parse error: Expecting value
            +
            Console output +
            2025-07-27T14:27:39+00:00
            +Running ./dl-op-linalg-reducemaxf-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.06, 1.17, 1.29
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html new file mode 100644 index 00000000..07b0d427 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html @@ -0,0 +1,38 @@ + + + +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646
            BM_SOFTMAXEXPSUMDIV_AutoVectorization0.003850.00385181,826
            +
            Console output +
            2025-07-27T14:27:39+00:00
            +Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.06, 1.17, 1.29
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------
            +Benchmark                                      Time             CPU   Iterations
            +--------------------------------------------------------------------------------
            +BM_SOFTMAXEXPSUMDIV_SCALAR                 0.006 ms        0.006 ms       121646
            +BM_SOFTMAXEXPSUMDIV_AutoVectorization      0.004 ms        0.004 ms       181826
            +-----------------------------------------------------------
            +Correctness Verification:
            +Transform case: PASS
            +-----------------------------------------------------------
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html new file mode 100644 index 00000000..07dc878a --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -0,0 +1,42 @@ + + + +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-matmul-transpose-b-benchmark.json

            + + + + +
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:52782785
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:532.322.45
            DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:585.585.55
            +
            Console output +
            2025-07-27T14:27:42+00:00
            +Running ./dl-op-matmul-transpose-b-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.06, 1.17, 1.29
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +-----------------------------------------------------------------------------------------------
            +Benchmark                                                     Time             CPU   Iterations
            +-----------------------------------------------------------------------------------------------
            +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5           1051 ms         1050 ms            5
            +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5            278 ms          278 ms            5
            +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5       32.3 ms         22.4 ms            5
            +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5                 85.5 ms         85.5 ms            5
            +---------- Verification ----------
            +scalar_O3 PASS
            +scalar_O3_omp PASS
            +vec PASS
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html new file mode 100644 index 00000000..79e1e3c3 --- /dev/null +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -0,0 +1,36 @@ + + + +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            dl-op-tosa-transpose-benchmark.json

            + + +
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45
            DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:518.913.45
            +
            Console output +
            2025-07-27T14:27:41+00:00
            +Running ./dl-op-tosa-transpose-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.06, 1.17, 1.29
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +-------------------------------------------------------------------------------------
            +Benchmark                                           Time             CPU   Iterations
            +-------------------------------------------------------------------------------------
            +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5       26.4 ms         21.4 ms            5
            +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5       18.9 ms         13.4 ms            5
            +---------- Verification ----------
            +scalar_O3 PASS
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..4b1e7a32 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            MLIR_Conv2D/17.197.1997
            Buddy_Conv2D/10.4180.4181,675
            Buddy_Corr2D_Constant_Padding/11.061.06666
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,817
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,689
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,253
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,956
            Buddy_Erosion2D_Constant_Padding/10.2150.2153,274
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,255
            Buddy_Opening2D_Constant_Padding/10.3140.3142,261
            Buddy_Closing2D_Constant_Padding/10.3180.3182,259
            Buddy_TopHat2D_Constant_Padding/10.810.81835
            Buddy_BottomHat2D_Constant_Padding/10.7870.787848
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,119
            OpenCV_Opening2D_Constant_Padding/10.2260.2263,092
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,082
            OpenCV_TopHat2D_Constant_Padding/10.260.262,689
            OpenCV_BottomHat2D_Constant_Padding/10.2590.2592,705
            OpenCV_MorphGrad2D_Constant_Padding/10.2520.2522,779
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,113
            +
            Console output +
            2025-06-01T10:09:28+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.00, 1.13, 1.85
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          143
            +MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
            +Buddy_Conv2D/1                                         0.418 ms        0.418 ms         1675
            +Buddy_Corr2D_Constant_Padding/1                         1.06 ms         1.06 ms          666
            +OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4817
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2689
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105253
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49956
            +Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3274
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3255
            +Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2261
            +Buddy_Closing2D_Constant_Padding/1                     0.318 ms        0.318 ms         2259
            +Buddy_TopHat2D_Constant_Padding/1                      0.810 ms        0.810 ms          835
            +Buddy_BottomHat2D_Constant_Padding/1                   0.787 ms        0.787 ms          848
            +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5119
            +OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3092
            +OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3082
            +OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2689
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.259 ms        0.259 ms         2705
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.252 ms        0.252 ms         2779
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5113
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..6b3a47c9 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            MLIR_Conv2D/17.187.1897
            Buddy_Conv2D/10.4180.4181,675
            Buddy_Corr2D_Constant_Padding/11.061.06662
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,155
            OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,833
            Buddy_Erosion2D_Constant_Padding/10.2150.2153,267
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,262
            Buddy_Opening2D_Constant_Padding/10.3090.3092,259
            Buddy_Closing2D_Constant_Padding/10.3110.3112,232
            Buddy_TopHat2D_Constant_Padding/10.8010.801854
            Buddy_BottomHat2D_Constant_Padding/10.7950.795833
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,118
            OpenCV_Opening2D_Constant_Padding/10.2240.2243,117
            OpenCV_Closing2D_Constant_Padding/10.2260.2263,087
            OpenCV_TopHat2D_Constant_Padding/10.2560.2562,731
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,693
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,799
            OpenCV_Dilate2D_Constant_Padding/10.1390.1395,051
            +
            Console output +
            2025-06-01T10:09:52+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.00, 1.12, 1.83
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          143
            +MLIR_Conv2D/1                                           7.18 ms         7.18 ms           97
            +Buddy_Conv2D/1                                         0.418 ms        0.418 ms         1675
            +Buddy_Corr2D_Constant_Padding/1                         1.06 ms         1.06 ms          662
            +OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105155
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49833
            +Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3267
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3262
            +Buddy_Opening2D_Constant_Padding/1                     0.309 ms        0.309 ms         2259
            +Buddy_Closing2D_Constant_Padding/1                     0.311 ms        0.311 ms         2232
            +Buddy_TopHat2D_Constant_Padding/1                      0.801 ms        0.801 ms          854
            +Buddy_BottomHat2D_Constant_Padding/1                   0.795 ms        0.795 ms          833
            +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5118
            +OpenCV_Opening2D_Constant_Padding/1                    0.224 ms        0.224 ms         3117
            +OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3087
            +OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2731
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2693
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2799
            +OpenCV_Dilate2D_Constant_Padding/1                     0.139 ms        0.139 ms         5051
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..5522eb0e --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            MLIR_Conv2D/1292925
            Buddy_Conv2D/11.111.11632
            Buddy_Corr2D_Constant_Padding/11.741.74400
            OpenCV_Filter2D_Constant_Padding/12.682.68262
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,855
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006630.00663105,416
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,870
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,258
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,246
            Buddy_Opening2D_Constant_Padding/10.320.322,260
            Buddy_Closing2D_Constant_Padding/10.310.312,223
            Buddy_TopHat2D_Constant_Padding/10.8060.806827
            Buddy_BottomHat2D_Constant_Padding/10.820.82852
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,096
            OpenCV_Opening2D_Constant_Padding/10.2230.2233,136
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,085
            OpenCV_TopHat2D_Constant_Padding/10.260.262,693
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,686
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,746
            OpenCV_Dilate2D_Constant_Padding/10.1340.1345,208
            +
            Console output +
            2025-06-01T10:10:17+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.00, 1.11, 1.81
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           61
            +MLIR_Conv2D/1                                           29.0 ms         29.0 ms           25
            +Buddy_Conv2D/1                                          1.11 ms         1.11 ms          632
            +Buddy_Corr2D_Constant_Padding/1                         1.74 ms         1.74 ms          400
            +OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          262
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4855
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2692
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105416
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49870
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3258
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3246
            +Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2260
            +Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2223
            +Buddy_TopHat2D_Constant_Padding/1                      0.806 ms        0.806 ms          827
            +Buddy_BottomHat2D_Constant_Padding/1                   0.820 ms        0.820 ms          852
            +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5096
            +OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3136
            +OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3085
            +OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2693
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2686
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2746
            +OpenCV_Dilate2D_Constant_Padding/1                     0.134 ms        0.134 ms         5208
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..035ae8cb --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            MLIR_Conv2D/1292925
            Buddy_Conv2D/11.021.02685
            Buddy_Corr2D_Constant_Padding/11.751.75400
            OpenCV_Filter2D_Constant_Padding/12.682.68261
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,858
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,372
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,847
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,249
            Buddy_Dilation2D_Constant_Padding/10.2130.2133,265
            Buddy_Opening2D_Constant_Padding/10.3140.3142,214
            Buddy_Closing2D_Constant_Padding/10.3080.3082,229
            Buddy_TopHat2D_Constant_Padding/10.790.79828
            Buddy_BottomHat2D_Constant_Padding/10.7770.777854
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,075
            OpenCV_Opening2D_Constant_Padding/10.2250.2253,111
            OpenCV_Closing2D_Constant_Padding/10.2290.2293,056
            OpenCV_TopHat2D_Constant_Padding/10.2620.2622,672
            OpenCV_BottomHat2D_Constant_Padding/10.2640.2642,653
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,750
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,201
            +
            Console output +
            2025-06-01T10:10:41+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.00, 1.10, 1.79
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
            +MLIR_Conv2D/1                                           29.0 ms         29.0 ms           25
            +Buddy_Conv2D/1                                          1.02 ms         1.02 ms          685
            +Buddy_Corr2D_Constant_Padding/1                         1.75 ms         1.75 ms          400
            +OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4858
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105372
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49847
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3249
            +Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3265
            +Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2214
            +Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2229
            +Buddy_TopHat2D_Constant_Padding/1                      0.790 ms        0.790 ms          828
            +Buddy_BottomHat2D_Constant_Padding/1                   0.777 ms        0.777 ms          854
            +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5075
            +OpenCV_Opening2D_Constant_Padding/1                    0.225 ms        0.225 ms         3111
            +OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3056
            +OpenCV_TopHat2D_Constant_Padding/1                     0.262 ms        0.262 ms         2672
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.264 ms        0.264 ms         2653
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2750
            +OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5201
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..c5576381 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            MLIR_Conv2D/17.387.3895
            Buddy_Conv2D/10.3120.3122,241
            Buddy_Corr2D_Constant_Padding/10.8210.821849
            OpenCV_Filter2D_Constant_Padding/11.281.28547
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1460.1464,774
            Buddy_Resize2D_Bilinear_Interpolation/10.2670.2672,627
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00670.0067103,069
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,911
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,259
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,230
            Buddy_Opening2D_Constant_Padding/10.3090.3092,255
            Buddy_Closing2D_Constant_Padding/10.3090.3092,273
            Buddy_TopHat2D_Constant_Padding/10.7760.776855
            Buddy_BottomHat2D_Constant_Padding/10.7740.774856
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,148
            OpenCV_Opening2D_Constant_Padding/10.2190.2193,185
            OpenCV_Closing2D_Constant_Padding/10.2230.2233,143
            OpenCV_TopHat2D_Constant_Padding/10.2590.2592,699
            OpenCV_BottomHat2D_Constant_Padding/10.2580.2582,714
            OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,791
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,112
            +
            Console output +
            2025-06-01T10:05:28+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.19, 1.29, 2.11
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      4.87 ms         4.87 ms          144
            +MLIR_Conv2D/1                                           7.38 ms         7.38 ms           95
            +Buddy_Conv2D/1                                         0.312 ms        0.312 ms         2241
            +Buddy_Corr2D_Constant_Padding/1                        0.821 ms        0.821 ms          849
            +OpenCV_Filter2D_Constant_Padding/1                      1.28 ms         1.28 ms          547
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.146 ms        0.146 ms         4774
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.267 ms        0.267 ms         2627
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       103069
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49911
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3259
            +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3230
            +Buddy_Opening2D_Constant_Padding/1                     0.309 ms        0.309 ms         2255
            +Buddy_Closing2D_Constant_Padding/1                     0.309 ms        0.309 ms         2273
            +Buddy_TopHat2D_Constant_Padding/1                      0.776 ms        0.776 ms          855
            +Buddy_BottomHat2D_Constant_Padding/1                   0.774 ms        0.774 ms          856
            +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5148
            +OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3185
            +OpenCV_Closing2D_Constant_Padding/1                    0.223 ms        0.223 ms         3143
            +OpenCV_TopHat2D_Constant_Padding/1                     0.259 ms        0.259 ms         2699
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.258 ms        0.258 ms         2714
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2791
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5112
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..7d763bbc --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            MLIR_Conv2D/17.217.2197
            Buddy_Conv2D/10.3110.312,257
            Buddy_Corr2D_Constant_Padding/10.7980.798878
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,872
            Buddy_Resize2D_Bilinear_Interpolation/10.2620.2622,654
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,278
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,913
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,221
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,271
            Buddy_Opening2D_Constant_Padding/10.3160.3162,213
            Buddy_Closing2D_Constant_Padding/10.3110.3112,230
            Buddy_TopHat2D_Constant_Padding/10.80.8866
            Buddy_BottomHat2D_Constant_Padding/10.7970.797846
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,058
            OpenCV_Opening2D_Constant_Padding/10.2220.2223,149
            OpenCV_Closing2D_Constant_Padding/10.2210.2213,169
            OpenCV_TopHat2D_Constant_Padding/10.2570.2572,725
            OpenCV_BottomHat2D_Constant_Padding/10.2570.2572,715
            OpenCV_MorphGrad2D_Constant_Padding/10.250.252,798
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,116
            +
            Console output +
            2025-06-01T10:05:52+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.20, 1.29, 2.09
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      4.90 ms         4.90 ms          144
            +MLIR_Conv2D/1                                           7.21 ms         7.21 ms           97
            +Buddy_Conv2D/1                                         0.311 ms        0.310 ms         2257
            +Buddy_Corr2D_Constant_Padding/1                        0.798 ms        0.798 ms          878
            +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4872
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.262 ms        0.262 ms         2654
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105278
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49913
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3221
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3271
            +Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2213
            +Buddy_Closing2D_Constant_Padding/1                     0.311 ms        0.311 ms         2230
            +Buddy_TopHat2D_Constant_Padding/1                      0.800 ms        0.800 ms          866
            +Buddy_BottomHat2D_Constant_Padding/1                   0.797 ms        0.797 ms          846
            +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5058
            +OpenCV_Opening2D_Constant_Padding/1                    0.222 ms        0.222 ms         3149
            +OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3169
            +OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2725
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.257 ms        0.257 ms         2715
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2798
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5116
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..e370d7f3 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            MLIR_Conv2D/17.27.297
            Buddy_Conv2D/10.310.312,252
            Buddy_Corr2D_Constant_Padding/10.8020.802868
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,649
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669105,099
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,931
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,253
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,245
            Buddy_Opening2D_Constant_Padding/10.3160.3162,229
            Buddy_Closing2D_Constant_Padding/10.3130.3132,256
            Buddy_TopHat2D_Constant_Padding/10.8040.804822
            Buddy_BottomHat2D_Constant_Padding/10.7990.799842
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,153
            OpenCV_Opening2D_Constant_Padding/10.220.223,158
            OpenCV_Closing2D_Constant_Padding/10.2210.2213,163
            OpenCV_TopHat2D_Constant_Padding/10.2570.2572,727
            OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,742
            OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,826
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,148
            +
            Console output +
            2025-06-01T10:06:16+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.13, 1.26, 2.06
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      4.77 ms         4.77 ms          147
            +MLIR_Conv2D/1                                           7.20 ms         7.20 ms           97
            +Buddy_Conv2D/1                                         0.310 ms        0.310 ms         2252
            +Buddy_Corr2D_Constant_Padding/1                        0.802 ms        0.802 ms          868
            +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2649
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105099
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49931
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3253
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3245
            +Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2229
            +Buddy_Closing2D_Constant_Padding/1                     0.313 ms        0.313 ms         2256
            +Buddy_TopHat2D_Constant_Padding/1                      0.804 ms        0.804 ms          822
            +Buddy_BottomHat2D_Constant_Padding/1                   0.799 ms        0.799 ms          842
            +OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5153
            +OpenCV_Opening2D_Constant_Padding/1                    0.220 ms        0.220 ms         3158
            +OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3163
            +OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2727
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2742
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2826
            +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5148
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..b06ad02c --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            MLIR_Conv2D/17.177.1798
            Buddy_Conv2D/10.310.312,260
            Buddy_Corr2D_Constant_Padding/10.7950.795875
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,871
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,651
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,620
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,783
            Buddy_Erosion2D_Constant_Padding/10.2180.2183,101
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,270
            Buddy_Opening2D_Constant_Padding/10.3190.3192,180
            Buddy_Closing2D_Constant_Padding/10.3120.3122,262
            Buddy_TopHat2D_Constant_Padding/10.8140.814841
            Buddy_BottomHat2D_Constant_Padding/10.820.82849
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,157
            OpenCV_Opening2D_Constant_Padding/10.2190.2193,187
            OpenCV_Closing2D_Constant_Padding/10.2180.2183,207
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,745
            OpenCV_BottomHat2D_Constant_Padding/10.2530.2532,766
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,808
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,102
            +
            Console output +
            2025-06-01T10:06:40+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.08, 1.24, 2.03
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      4.91 ms         4.91 ms          143
            +MLIR_Conv2D/1                                           7.17 ms         7.17 ms           98
            +Buddy_Conv2D/1                                         0.310 ms        0.310 ms         2260
            +Buddy_Corr2D_Constant_Padding/1                        0.795 ms        0.795 ms          875
            +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4871
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2651
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104620
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49783
            +Buddy_Erosion2D_Constant_Padding/1                     0.218 ms        0.218 ms         3101
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3270
            +Buddy_Opening2D_Constant_Padding/1                     0.319 ms        0.319 ms         2180
            +Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2262
            +Buddy_TopHat2D_Constant_Padding/1                      0.814 ms        0.814 ms          841
            +Buddy_BottomHat2D_Constant_Padding/1                   0.820 ms        0.820 ms          849
            +OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5157
            +OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3187
            +OpenCV_Closing2D_Constant_Padding/1                    0.218 ms        0.218 ms         3207
            +OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2745
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.253 ms        0.253 ms         2766
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2808
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5102
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..6a9399ab --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            MLIR_Conv2D/129.229.224
            Buddy_Conv2D/11.311.31536
            Buddy_Corr2D_Constant_Padding/12.332.33300
            OpenCV_Filter2D_Constant_Padding/14.114.11170
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,689
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,080
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,721
            Buddy_Erosion2D_Constant_Padding/10.2160.2163,235
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,260
            Buddy_Opening2D_Constant_Padding/10.310.312,249
            Buddy_Closing2D_Constant_Padding/10.3120.3122,139
            Buddy_TopHat2D_Constant_Padding/10.780.78826
            Buddy_BottomHat2D_Constant_Padding/10.7820.782830
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,049
            OpenCV_Opening2D_Constant_Padding/10.2260.2263,095
            OpenCV_Closing2D_Constant_Padding/10.2250.2253,109
            OpenCV_TopHat2D_Constant_Padding/10.260.262,690
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,688
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,759
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,116
            +
            Console output +
            2025-06-01T10:07:04+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.05, 1.22, 2.00
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           60
            +MLIR_Conv2D/1                                           29.2 ms         29.2 ms           24
            +Buddy_Conv2D/1                                          1.31 ms         1.31 ms          536
            +Buddy_Corr2D_Constant_Padding/1                         2.33 ms         2.33 ms          300
            +OpenCV_Filter2D_Constant_Padding/1                      4.11 ms         4.11 ms          170
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2689
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105080
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49721
            +Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3235
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3260
            +Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2249
            +Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2139
            +Buddy_TopHat2D_Constant_Padding/1                      0.780 ms        0.780 ms          826
            +Buddy_BottomHat2D_Constant_Padding/1                   0.782 ms        0.782 ms          830
            +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5049
            +OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3095
            +OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3109
            +OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2690
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2688
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2759
            +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5116
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..88cebeb3 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            MLIR_Conv2D/129.129.124
            Buddy_Conv2D/11.381.38508
            Buddy_Corr2D_Constant_Padding/12.322.32301
            OpenCV_Filter2D_Constant_Padding/14.14.1170
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,064
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,925
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,267
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,269
            Buddy_Opening2D_Constant_Padding/10.3160.3162,235
            Buddy_Closing2D_Constant_Padding/10.3150.3152,209
            Buddy_TopHat2D_Constant_Padding/10.8010.801841
            Buddy_BottomHat2D_Constant_Padding/10.7850.785852
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,129
            OpenCV_Opening2D_Constant_Padding/10.2250.2253,105
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,082
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,679
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,672
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,751
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,094
            +
            Console output +
            2025-06-01T10:07:28+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.03, 1.20, 1.98
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           60
            +MLIR_Conv2D/1                                           29.1 ms         29.1 ms           24
            +Buddy_Conv2D/1                                          1.38 ms         1.38 ms          508
            +Buddy_Corr2D_Constant_Padding/1                         2.32 ms         2.32 ms          301
            +OpenCV_Filter2D_Constant_Padding/1                      4.10 ms         4.10 ms          170
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105064
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49925
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3267
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3269
            +Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2235
            +Buddy_Closing2D_Constant_Padding/1                     0.315 ms        0.315 ms         2209
            +Buddy_TopHat2D_Constant_Padding/1                      0.801 ms        0.801 ms          841
            +Buddy_BottomHat2D_Constant_Padding/1                   0.785 ms        0.785 ms          852
            +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5129
            +OpenCV_Opening2D_Constant_Padding/1                    0.225 ms        0.225 ms         3105
            +OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3082
            +OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2679
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2672
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2751
            +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5094
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..b95a586d --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            MLIR_Conv2D/166.766.711
            Buddy_Conv2D/12.242.24312
            Buddy_Corr2D_Constant_Padding/14.674.67150
            OpenCV_Filter2D_Constant_Padding/18.618.6181
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,200
            OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,717
            Buddy_Erosion2D_Constant_Padding/10.2130.2133,275
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,279
            Buddy_Opening2D_Constant_Padding/10.310.312,249
            Buddy_Closing2D_Constant_Padding/10.3140.3142,267
            Buddy_TopHat2D_Constant_Padding/10.7890.789827
            Buddy_BottomHat2D_Constant_Padding/10.7630.763845
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,188
            OpenCV_Opening2D_Constant_Padding/10.2290.2293,054
            OpenCV_Closing2D_Constant_Padding/10.2290.2293,052
            OpenCV_TopHat2D_Constant_Padding/10.2620.2622,667
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,674
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,759
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,114
            +
            Console output +
            2025-06-01T10:07:52+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.02, 1.19, 1.96
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      21.5 ms         21.5 ms           31
            +MLIR_Conv2D/1                                           66.7 ms         66.7 ms           11
            +Buddy_Conv2D/1                                          2.24 ms         2.24 ms          312
            +Buddy_Corr2D_Constant_Padding/1                         4.67 ms         4.67 ms          150
            +OpenCV_Filter2D_Constant_Padding/1                      8.61 ms         8.61 ms           81
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105200
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49717
            +Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3275
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3279
            +Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2249
            +Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.314 ms         2267
            +Buddy_TopHat2D_Constant_Padding/1                      0.789 ms        0.789 ms          827
            +Buddy_BottomHat2D_Constant_Padding/1                   0.763 ms        0.763 ms          845
            +OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5188
            +OpenCV_Opening2D_Constant_Padding/1                    0.229 ms        0.229 ms         3054
            +OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3052
            +OpenCV_TopHat2D_Constant_Padding/1                     0.262 ms        0.262 ms         2667
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2674
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2759
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5114
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..c1d02dd5 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            MLIR_Conv2D/166.766.710
            Buddy_Conv2D/12.342.34299
            Buddy_Corr2D_Constant_Padding/14.674.67150
            OpenCV_Filter2D_Constant_Padding/18.68.681
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,853
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,142
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01450,003
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,229
            Buddy_Dilation2D_Constant_Padding/10.2170.2173,262
            Buddy_Opening2D_Constant_Padding/10.3080.3082,262
            Buddy_Closing2D_Constant_Padding/10.310.312,236
            Buddy_TopHat2D_Constant_Padding/10.7770.777855
            Buddy_BottomHat2D_Constant_Padding/10.7960.796826
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,125
            OpenCV_Opening2D_Constant_Padding/10.2270.2273,079
            OpenCV_Closing2D_Constant_Padding/10.2260.2263,097
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,680
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,694
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,766
            OpenCV_Dilate2D_Constant_Padding/10.140.144,993
            +
            Console output +
            2025-06-01T10:08:16+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.02, 1.17, 1.93
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      21.6 ms         21.6 ms           32
            +MLIR_Conv2D/1                                           66.7 ms         66.7 ms           10
            +Buddy_Conv2D/1                                          2.34 ms         2.34 ms          299
            +Buddy_Corr2D_Constant_Padding/1                         4.67 ms         4.67 ms          150
            +OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4853
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105142
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        50003
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3229
            +Buddy_Dilation2D_Constant_Padding/1                    0.217 ms        0.217 ms         3262
            +Buddy_Opening2D_Constant_Padding/1                     0.308 ms        0.308 ms         2262
            +Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2236
            +Buddy_TopHat2D_Constant_Padding/1                      0.777 ms        0.777 ms          855
            +Buddy_BottomHat2D_Constant_Padding/1                   0.796 ms        0.796 ms          826
            +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5125
            +OpenCV_Opening2D_Constant_Padding/1                    0.227 ms        0.227 ms         3079
            +OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3097
            +OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2680
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2694
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2766
            +OpenCV_Dilate2D_Constant_Padding/1                     0.140 ms        0.140 ms         4993
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..b563ae9d --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            MLIR_Conv2D/11191196
            Buddy_Conv2D/13.913.91179
            Buddy_Corr2D_Constant_Padding/17.797.7990
            OpenCV_Filter2D_Constant_Padding/15.895.89119
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,837
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,099
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,521
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,257
            Buddy_Dilation2D_Constant_Padding/10.2160.2153,222
            Buddy_Opening2D_Constant_Padding/10.3290.3292,228
            Buddy_Closing2D_Constant_Padding/10.3140.3132,221
            Buddy_TopHat2D_Constant_Padding/10.7890.789845
            Buddy_BottomHat2D_Constant_Padding/10.7930.793825
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,117
            OpenCV_Opening2D_Constant_Padding/10.220.223,176
            OpenCV_Closing2D_Constant_Padding/10.220.223,179
            OpenCV_TopHat2D_Constant_Padding/10.2540.2542,758
            OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,740
            OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,779
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,176
            +
            Console output +
            2025-06-01T10:08:40+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.01, 1.16, 1.90
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      34.2 ms         34.2 ms           21
            +MLIR_Conv2D/1                                            119 ms          119 ms            6
            +Buddy_Conv2D/1                                          3.91 ms         3.91 ms          179
            +Buddy_Corr2D_Constant_Padding/1                         7.79 ms         7.79 ms           90
            +OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4837
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105099
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49521
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3257
            +Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.215 ms         3222
            +Buddy_Opening2D_Constant_Padding/1                     0.329 ms        0.329 ms         2228
            +Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.313 ms         2221
            +Buddy_TopHat2D_Constant_Padding/1                      0.789 ms        0.789 ms          845
            +Buddy_BottomHat2D_Constant_Padding/1                   0.793 ms        0.793 ms          825
            +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5117
            +OpenCV_Opening2D_Constant_Padding/1                    0.220 ms        0.220 ms         3176
            +OpenCV_Closing2D_Constant_Padding/1                    0.220 ms        0.220 ms         3179
            +OpenCV_TopHat2D_Constant_Padding/1                     0.254 ms        0.254 ms         2758
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2740
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2779
            +OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5176
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..3821bcb4 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            MLIR_Conv2D/11191196
            Buddy_Conv2D/13.983.98176
            Buddy_Corr2D_Constant_Padding/17.87.890
            OpenCV_Filter2D_Constant_Padding/15.895.89119
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,830
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,690
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,110
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,449
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,196
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,263
            Buddy_Opening2D_Constant_Padding/10.3130.3132,208
            Buddy_Closing2D_Constant_Padding/10.3270.3272,187
            Buddy_TopHat2D_Constant_Padding/10.8030.803835
            Buddy_BottomHat2D_Constant_Padding/10.7980.798832
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,076
            OpenCV_Opening2D_Constant_Padding/10.2290.2293,051
            OpenCV_Closing2D_Constant_Padding/10.230.233,037
            OpenCV_TopHat2D_Constant_Padding/10.2630.2632,671
            OpenCV_BottomHat2D_Constant_Padding/10.2630.2632,661
            OpenCV_MorphGrad2D_Constant_Padding/10.2570.2572,719
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,132
            +
            Console output +
            2025-06-01T10:09:04+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.00, 1.14, 1.88
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      34.3 ms         34.3 ms           20
            +MLIR_Conv2D/1                                            119 ms          119 ms            6
            +Buddy_Conv2D/1                                          3.98 ms         3.98 ms          176
            +Buddy_Corr2D_Constant_Padding/1                         7.80 ms         7.80 ms           90
            +OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4830
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2690
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105110
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49449
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3196
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3263
            +Buddy_Opening2D_Constant_Padding/1                     0.313 ms        0.313 ms         2208
            +Buddy_Closing2D_Constant_Padding/1                     0.327 ms        0.327 ms         2187
            +Buddy_TopHat2D_Constant_Padding/1                      0.803 ms        0.803 ms          835
            +Buddy_BottomHat2D_Constant_Padding/1                   0.798 ms        0.798 ms          832
            +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5076
            +OpenCV_Opening2D_Constant_Padding/1                    0.229 ms        0.229 ms         3051
            +OpenCV_Closing2D_Constant_Padding/1                    0.230 ms        0.230 ms         3037
            +OpenCV_TopHat2D_Constant_Padding/1                     0.263 ms        0.263 ms         2671
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.263 ms        0.263 ms         2661
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.257 ms        0.257 ms         2719
            +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5132
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..86b9fc22 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            MLIR_Conv2D/17.27.297
            Buddy_Conv2D/10.7050.705994
            Buddy_Corr2D_Constant_Padding/11.071.07652
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,153
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,887
            Buddy_Erosion2D_Constant_Padding/10.2160.2163,272
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,249
            Buddy_Opening2D_Constant_Padding/10.3110.3112,259
            Buddy_Closing2D_Constant_Padding/10.3070.3072,229
            Buddy_TopHat2D_Constant_Padding/10.7770.777858
            Buddy_BottomHat2D_Constant_Padding/10.7670.767831
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,114
            OpenCV_Opening2D_Constant_Padding/10.2230.2233,131
            OpenCV_Closing2D_Constant_Padding/10.2220.2223,149
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,744
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,738
            OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,822
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,112
            +
            Console output +
            2025-06-01T10:03:44+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.01, 1.35, 2.23
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      4.74 ms         4.74 ms          148
            +MLIR_Conv2D/1                                           7.20 ms         7.20 ms           97
            +Buddy_Conv2D/1                                         0.705 ms        0.705 ms          994
            +Buddy_Corr2D_Constant_Padding/1                         1.07 ms         1.07 ms          652
            +OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105153
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49887
            +Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3272
            +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3249
            +Buddy_Opening2D_Constant_Padding/1                     0.311 ms        0.311 ms         2259
            +Buddy_Closing2D_Constant_Padding/1                     0.307 ms        0.307 ms         2229
            +Buddy_TopHat2D_Constant_Padding/1                      0.777 ms        0.777 ms          858
            +Buddy_BottomHat2D_Constant_Padding/1                   0.767 ms        0.767 ms          831
            +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5114
            +OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3131
            +OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3149
            +OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2744
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2738
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2822
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5112
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..a11ce7f4 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            MLIR_Conv2D/17.217.2197
            Buddy_Conv2D/10.7070.707988
            Buddy_Corr2D_Constant_Padding/11.051.05668
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,676
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666104,914
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,862
            Buddy_Erosion2D_Constant_Padding/10.2130.2133,188
            Buddy_Dilation2D_Constant_Padding/10.2160.2163,259
            Buddy_Opening2D_Constant_Padding/10.3170.3172,184
            Buddy_Closing2D_Constant_Padding/10.3140.3142,136
            Buddy_TopHat2D_Constant_Padding/10.7860.786814
            Buddy_BottomHat2D_Constant_Padding/10.7990.799847
            OpenCV_Erode2D_Constant_Padding/10.1390.1395,040
            OpenCV_Opening2D_Constant_Padding/10.2210.2213,163
            OpenCV_Closing2D_Constant_Padding/10.2190.2193,197
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,741
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,735
            OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,817
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,127
            +
            Console output +
            2025-06-01T10:04:08+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.01, 1.32, 2.20
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          144
            +MLIR_Conv2D/1                                           7.21 ms         7.21 ms           97
            +Buddy_Conv2D/1                                         0.707 ms        0.707 ms          988
            +Buddy_Corr2D_Constant_Padding/1                         1.05 ms         1.05 ms          668
            +OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2676
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104914
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49862
            +Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3188
            +Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.216 ms         3259
            +Buddy_Opening2D_Constant_Padding/1                     0.317 ms        0.317 ms         2184
            +Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.314 ms         2136
            +Buddy_TopHat2D_Constant_Padding/1                      0.786 ms        0.786 ms          814
            +Buddy_BottomHat2D_Constant_Padding/1                   0.799 ms        0.799 ms          847
            +OpenCV_Erode2D_Constant_Padding/1                      0.139 ms        0.139 ms         5040
            +OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3163
            +OpenCV_Closing2D_Constant_Padding/1                    0.219 ms        0.219 ms         3197
            +OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2741
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2735
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2817
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5127
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..8585342c --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            MLIR_Conv2D/129.129.124
            Buddy_Conv2D/12.042.04343
            Buddy_Corr2D_Constant_Padding/11.741.74400
            OpenCV_Filter2D_Constant_Padding/12.682.68261
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,858
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,687
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006670.00667104,992
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,870
            Buddy_Erosion2D_Constant_Padding/10.2160.2163,259
            Buddy_Dilation2D_Constant_Padding/10.2160.2163,239
            Buddy_Opening2D_Constant_Padding/10.320.322,223
            Buddy_Closing2D_Constant_Padding/10.3080.3082,211
            Buddy_TopHat2D_Constant_Padding/10.7910.791836
            Buddy_BottomHat2D_Constant_Padding/10.8060.805841
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,076
            OpenCV_Opening2D_Constant_Padding/10.2270.2273,086
            OpenCV_Closing2D_Constant_Padding/10.2250.2253,114
            OpenCV_TopHat2D_Constant_Padding/10.2640.2642,653
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,674
            OpenCV_MorphGrad2D_Constant_Padding/10.2550.2552,741
            OpenCV_Dilate2D_Constant_Padding/10.1380.1385,067
            +
            Console output +
            2025-06-01T10:04:31+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.00, 1.30, 2.17
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           60
            +MLIR_Conv2D/1                                           29.1 ms         29.1 ms           24
            +Buddy_Conv2D/1                                          2.04 ms         2.04 ms          343
            +Buddy_Corr2D_Constant_Padding/1                         1.74 ms         1.74 ms          400
            +OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4858
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2687
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104992
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49870
            +Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3259
            +Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.216 ms         3239
            +Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2223
            +Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2211
            +Buddy_TopHat2D_Constant_Padding/1                      0.791 ms        0.791 ms          836
            +Buddy_BottomHat2D_Constant_Padding/1                   0.806 ms        0.805 ms          841
            +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5076
            +OpenCV_Opening2D_Constant_Padding/1                    0.227 ms        0.227 ms         3086
            +OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3114
            +OpenCV_TopHat2D_Constant_Padding/1                     0.264 ms        0.264 ms         2653
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2674
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.255 ms        0.255 ms         2741
            +OpenCV_Dilate2D_Constant_Padding/1                     0.138 ms        0.138 ms         5067
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..4fb7e607 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            MLIR_Conv2D/1292924
            Buddy_Conv2D/12.082.08337
            Buddy_Corr2D_Constant_Padding/11.751.75399
            OpenCV_Filter2D_Constant_Padding/12.682.68261
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,856
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,688
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,682
            OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,744
            Buddy_Erosion2D_Constant_Padding/10.2190.2193,240
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,213
            Buddy_Opening2D_Constant_Padding/10.3080.3082,240
            Buddy_Closing2D_Constant_Padding/10.3090.3092,269
            Buddy_TopHat2D_Constant_Padding/10.820.82841
            Buddy_BottomHat2D_Constant_Padding/10.80.8846
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,072
            OpenCV_Opening2D_Constant_Padding/10.2230.2233,139
            OpenCV_Closing2D_Constant_Padding/10.2280.2283,074
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,680
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,676
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,755
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,189
            +
            Console output +
            2025-06-01T10:04:55+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.06, 1.29, 2.15
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
            +MLIR_Conv2D/1                                           29.0 ms         29.0 ms           24
            +Buddy_Conv2D/1                                          2.08 ms         2.08 ms          337
            +Buddy_Corr2D_Constant_Padding/1                         1.75 ms         1.75 ms          399
            +OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4856
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2688
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104682
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49744
            +Buddy_Erosion2D_Constant_Padding/1                     0.219 ms        0.219 ms         3240
            +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3213
            +Buddy_Opening2D_Constant_Padding/1                     0.308 ms        0.308 ms         2240
            +Buddy_Closing2D_Constant_Padding/1                     0.309 ms        0.309 ms         2269
            +Buddy_TopHat2D_Constant_Padding/1                      0.820 ms        0.820 ms          841
            +Buddy_BottomHat2D_Constant_Padding/1                   0.800 ms        0.800 ms          846
            +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5072
            +OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3139
            +OpenCV_Closing2D_Constant_Padding/1                    0.228 ms        0.228 ms         3074
            +OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2680
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2676
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2755
            +OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5189
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..19d8035f --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            MLIR_Conv2D/17.387.3895
            Buddy_Conv2D/10.5220.5211,363
            Buddy_Corr2D_Constant_Padding/10.8140.814865
            OpenCV_Filter2D_Constant_Padding/11.281.28548
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1460.1464,815
            Buddy_Resize2D_Bilinear_Interpolation/10.2670.2672,628
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006810.00681102,923
            OpenCV_Resize2D_Bilinear_Interpolation/10.01430.014348,919
            Buddy_Erosion2D_Constant_Padding/10.2220.2223,130
            Buddy_Dilation2D_Constant_Padding/10.220.223,158
            Buddy_Opening2D_Constant_Padding/10.3340.3342,177
            Buddy_Closing2D_Constant_Padding/10.3380.3382,114
            Buddy_TopHat2D_Constant_Padding/10.8930.893734
            Buddy_BottomHat2D_Constant_Padding/10.8990.899761
            OpenCV_Erode2D_Constant_Padding/10.1390.1395,020
            OpenCV_Opening2D_Constant_Padding/10.2170.2173,210
            OpenCV_Closing2D_Constant_Padding/10.220.223,180
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,639
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,732
            OpenCV_MorphGrad2D_Constant_Padding/10.2470.2472,827
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,123
            +
            Console output +
            2025-06-01T09:59:45+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.45, 1.75, 2.60
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      5.03 ms         5.03 ms          139
            +MLIR_Conv2D/1                                           7.38 ms         7.38 ms           95
            +Buddy_Conv2D/1                                         0.522 ms        0.521 ms         1363
            +Buddy_Corr2D_Constant_Padding/1                        0.814 ms        0.814 ms          865
            +OpenCV_Filter2D_Constant_Padding/1                      1.28 ms         1.28 ms          548
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.146 ms        0.146 ms         4815
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.267 ms        0.267 ms         2628
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       102923
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        48919
            +Buddy_Erosion2D_Constant_Padding/1                     0.222 ms        0.222 ms         3130
            +Buddy_Dilation2D_Constant_Padding/1                    0.220 ms        0.220 ms         3158
            +Buddy_Opening2D_Constant_Padding/1                     0.334 ms        0.334 ms         2177
            +Buddy_Closing2D_Constant_Padding/1                     0.338 ms        0.338 ms         2114
            +Buddy_TopHat2D_Constant_Padding/1                      0.893 ms        0.893 ms          734
            +Buddy_BottomHat2D_Constant_Padding/1                   0.899 ms        0.899 ms          761
            +OpenCV_Erode2D_Constant_Padding/1                      0.139 ms        0.139 ms         5020
            +OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3210
            +OpenCV_Closing2D_Constant_Padding/1                    0.220 ms        0.220 ms         3180
            +OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2639
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2732
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.247 ms        0.247 ms         2827
            +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5123
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..0873c54b --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            MLIR_Conv2D/17.197.1997
            Buddy_Conv2D/10.5240.5241,337
            Buddy_Corr2D_Constant_Padding/10.7920.792882
            OpenCV_Filter2D_Constant_Padding/11.251.25561
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,818
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.262,683
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,687
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,857
            Buddy_Erosion2D_Constant_Padding/10.2210.2213,251
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,237
            Buddy_Opening2D_Constant_Padding/10.3230.3232,241
            Buddy_Closing2D_Constant_Padding/10.3080.3082,271
            Buddy_TopHat2D_Constant_Padding/10.8050.805841
            Buddy_BottomHat2D_Constant_Padding/10.8090.809846
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,105
            OpenCV_Opening2D_Constant_Padding/10.2170.2173,219
            OpenCV_Closing2D_Constant_Padding/10.2170.2173,216
            OpenCV_TopHat2D_Constant_Padding/10.2580.2582,710
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,740
            OpenCV_MorphGrad2D_Constant_Padding/10.250.252,803
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,105
            +
            Console output +
            2025-06-01T10:00:09+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.48, 1.73, 2.57
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      4.89 ms         4.89 ms          143
            +MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
            +Buddy_Conv2D/1                                         0.524 ms        0.524 ms         1337
            +Buddy_Corr2D_Constant_Padding/1                        0.792 ms        0.792 ms          882
            +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          561
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4818
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.260 ms         2683
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104687
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49857
            +Buddy_Erosion2D_Constant_Padding/1                     0.221 ms        0.221 ms         3251
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3237
            +Buddy_Opening2D_Constant_Padding/1                     0.323 ms        0.323 ms         2241
            +Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2271
            +Buddy_TopHat2D_Constant_Padding/1                      0.805 ms        0.805 ms          841
            +Buddy_BottomHat2D_Constant_Padding/1                   0.809 ms        0.809 ms          846
            +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5105
            +OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3219
            +OpenCV_Closing2D_Constant_Padding/1                    0.217 ms        0.217 ms         3216
            +OpenCV_TopHat2D_Constant_Padding/1                     0.258 ms        0.258 ms         2710
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2740
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2803
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5105
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..11997445 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            MLIR_Conv2D/17.197.1997
            Buddy_Conv2D/10.5230.5231,334
            Buddy_Corr2D_Constant_Padding/10.7930.793882
            OpenCV_Filter2D_Constant_Padding/11.251.25561
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,679
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006680.00668104,555
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,894
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,270
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,269
            Buddy_Opening2D_Constant_Padding/10.320.322,113
            Buddy_Closing2D_Constant_Padding/10.3060.3062,219
            Buddy_TopHat2D_Constant_Padding/10.7810.781863
            Buddy_BottomHat2D_Constant_Padding/10.7950.795836
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,175
            OpenCV_Opening2D_Constant_Padding/10.2170.2173,222
            OpenCV_Closing2D_Constant_Padding/10.2220.2223,153
            OpenCV_TopHat2D_Constant_Padding/10.2570.2572,720
            OpenCV_BottomHat2D_Constant_Padding/10.2570.2572,721
            OpenCV_MorphGrad2D_Constant_Padding/10.250.252,805
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,121
            +
            Console output +
            2025-06-01T10:00:33+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.32, 1.67, 2.52
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      4.89 ms         4.89 ms          143
            +MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
            +Buddy_Conv2D/1                                         0.523 ms        0.523 ms         1334
            +Buddy_Corr2D_Constant_Padding/1                        0.793 ms        0.793 ms          882
            +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          561
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2679
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104555
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49894
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3270
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3269
            +Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2113
            +Buddy_Closing2D_Constant_Padding/1                     0.306 ms        0.306 ms         2219
            +Buddy_TopHat2D_Constant_Padding/1                      0.781 ms        0.781 ms          863
            +Buddy_BottomHat2D_Constant_Padding/1                   0.795 ms        0.795 ms          836
            +OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5175
            +OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3222
            +OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3153
            +OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2720
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.257 ms        0.257 ms         2721
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2805
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5121
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..978110d2 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            MLIR_Conv2D/17.197.1897
            Buddy_Conv2D/10.5260.5261,322
            Buddy_Corr2D_Constant_Padding/10.7930.793885
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,865
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,697
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,155
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,883
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,259
            Buddy_Dilation2D_Constant_Padding/10.2130.2133,259
            Buddy_Opening2D_Constant_Padding/10.3150.3152,258
            Buddy_Closing2D_Constant_Padding/10.3120.3122,255
            Buddy_TopHat2D_Constant_Padding/10.7830.783854
            Buddy_BottomHat2D_Constant_Padding/10.7850.785821
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,119
            OpenCV_Opening2D_Constant_Padding/10.2210.2213,167
            OpenCV_Closing2D_Constant_Padding/10.2230.2233,139
            OpenCV_TopHat2D_Constant_Padding/10.2580.2582,712
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,733
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,804
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,118
            +
            Console output +
            2025-06-01T10:00:56+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.23, 1.63, 2.49
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      4.87 ms         4.87 ms          144
            +MLIR_Conv2D/1                                           7.19 ms         7.18 ms           97
            +Buddy_Conv2D/1                                         0.526 ms        0.526 ms         1322
            +Buddy_Corr2D_Constant_Padding/1                        0.793 ms        0.793 ms          885
            +OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4865
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2697
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105155
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49883
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3259
            +Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3259
            +Buddy_Opening2D_Constant_Padding/1                     0.315 ms        0.315 ms         2258
            +Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2255
            +Buddy_TopHat2D_Constant_Padding/1                      0.783 ms        0.783 ms          854
            +Buddy_BottomHat2D_Constant_Padding/1                   0.785 ms        0.785 ms          821
            +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5119
            +OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3167
            +OpenCV_Closing2D_Constant_Padding/1                    0.223 ms        0.223 ms         3139
            +OpenCV_TopHat2D_Constant_Padding/1                     0.258 ms        0.258 ms         2712
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2733
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2804
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5118
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..77463382 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            MLIR_Conv2D/128.928.924
            Buddy_Conv2D/13.033.03231
            Buddy_Corr2D_Constant_Padding/12.312.31303
            OpenCV_Filter2D_Constant_Padding/14.114.11171
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,848
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,211
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,990
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,269
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,239
            Buddy_Opening2D_Constant_Padding/10.3140.3142,276
            Buddy_Closing2D_Constant_Padding/10.3080.3082,257
            Buddy_TopHat2D_Constant_Padding/10.8040.804840
            Buddy_BottomHat2D_Constant_Padding/10.7830.783836
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,131
            OpenCV_Opening2D_Constant_Padding/10.2240.2243,120
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,078
            OpenCV_TopHat2D_Constant_Padding/10.260.262,688
            OpenCV_BottomHat2D_Constant_Padding/10.2590.2592,702
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,764
            OpenCV_Dilate2D_Constant_Padding/10.1390.1395,022
            +
            Console output +
            2025-06-01T10:01:20+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.15, 1.58, 2.45
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           61
            +MLIR_Conv2D/1                                           28.9 ms         28.9 ms           24
            +Buddy_Conv2D/1                                          3.03 ms         3.03 ms          231
            +Buddy_Corr2D_Constant_Padding/1                         2.31 ms         2.31 ms          303
            +OpenCV_Filter2D_Constant_Padding/1                      4.11 ms         4.11 ms          171
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4848
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105211
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49990
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3269
            +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3239
            +Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2276
            +Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2257
            +Buddy_TopHat2D_Constant_Padding/1                      0.804 ms        0.804 ms          840
            +Buddy_BottomHat2D_Constant_Padding/1                   0.783 ms        0.783 ms          836
            +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5131
            +OpenCV_Opening2D_Constant_Padding/1                    0.224 ms        0.224 ms         3120
            +OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3078
            +OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2688
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.259 ms        0.259 ms         2702
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2764
            +OpenCV_Dilate2D_Constant_Padding/1                     0.139 ms        0.139 ms         5022
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..1e97ad5b --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            MLIR_Conv2D/128.728.724
            Buddy_Conv2D/13.033.03231
            Buddy_Corr2D_Constant_Padding/12.312.31302
            OpenCV_Filter2D_Constant_Padding/14.14.1170
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,835
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006680.00668104,962
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,843
            Buddy_Erosion2D_Constant_Padding/10.2150.2153,259
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,254
            Buddy_Opening2D_Constant_Padding/10.3070.3072,262
            Buddy_Closing2D_Constant_Padding/10.3190.3192,241
            Buddy_TopHat2D_Constant_Padding/10.7840.784851
            Buddy_BottomHat2D_Constant_Padding/10.7770.777840
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,192
            OpenCV_Opening2D_Constant_Padding/10.2310.2313,030
            OpenCV_Closing2D_Constant_Padding/10.2290.2293,053
            OpenCV_TopHat2D_Constant_Padding/10.2680.2682,609
            OpenCV_BottomHat2D_Constant_Padding/10.2670.2672,624
            OpenCV_MorphGrad2D_Constant_Padding/10.2580.2582,714
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,119
            +
            Console output +
            2025-06-01T10:01:44+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.10, 1.53, 2.41
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
            +MLIR_Conv2D/1                                           28.7 ms         28.7 ms           24
            +Buddy_Conv2D/1                                          3.03 ms         3.03 ms          231
            +Buddy_Corr2D_Constant_Padding/1                         2.31 ms         2.31 ms          302
            +OpenCV_Filter2D_Constant_Padding/1                      4.10 ms         4.10 ms          170
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4835
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104962
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49843
            +Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3259
            +Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3254
            +Buddy_Opening2D_Constant_Padding/1                     0.307 ms        0.307 ms         2262
            +Buddy_Closing2D_Constant_Padding/1                     0.319 ms        0.319 ms         2241
            +Buddy_TopHat2D_Constant_Padding/1                      0.784 ms        0.784 ms          851
            +Buddy_BottomHat2D_Constant_Padding/1                   0.777 ms        0.777 ms          840
            +OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5192
            +OpenCV_Opening2D_Constant_Padding/1                    0.231 ms        0.231 ms         3030
            +OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3053
            +OpenCV_TopHat2D_Constant_Padding/1                     0.268 ms        0.268 ms         2609
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.267 ms        0.267 ms         2624
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.258 ms        0.258 ms         2714
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5119
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..ef1965aa --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            MLIR_Conv2D/166.766.711
            Buddy_Conv2D/16.126.12114
            Buddy_Corr2D_Constant_Padding/14.654.65151
            OpenCV_Filter2D_Constant_Padding/18.68.681
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,138
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,889
            Buddy_Erosion2D_Constant_Padding/10.2130.2133,257
            Buddy_Dilation2D_Constant_Padding/10.2130.2133,242
            Buddy_Opening2D_Constant_Padding/10.3130.3132,237
            Buddy_Closing2D_Constant_Padding/10.3180.3182,232
            Buddy_TopHat2D_Constant_Padding/10.7750.775853
            Buddy_BottomHat2D_Constant_Padding/10.7880.788846
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,075
            OpenCV_Opening2D_Constant_Padding/10.2320.2323,015
            OpenCV_Closing2D_Constant_Padding/10.2250.2253,114
            OpenCV_TopHat2D_Constant_Padding/10.2640.2642,647
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,672
            OpenCV_MorphGrad2D_Constant_Padding/10.2550.2552,749
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,111
            +
            Console output +
            2025-06-01T10:02:08+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.06, 1.49, 2.37
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      21.6 ms         21.6 ms           31
            +MLIR_Conv2D/1                                           66.7 ms         66.7 ms           11
            +Buddy_Conv2D/1                                          6.12 ms         6.12 ms          114
            +Buddy_Corr2D_Constant_Padding/1                         4.65 ms         4.65 ms          151
            +OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105138
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49889
            +Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3257
            +Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3242
            +Buddy_Opening2D_Constant_Padding/1                     0.313 ms        0.313 ms         2237
            +Buddy_Closing2D_Constant_Padding/1                     0.318 ms        0.318 ms         2232
            +Buddy_TopHat2D_Constant_Padding/1                      0.775 ms        0.775 ms          853
            +Buddy_BottomHat2D_Constant_Padding/1                   0.788 ms        0.788 ms          846
            +OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5075
            +OpenCV_Opening2D_Constant_Padding/1                    0.232 ms        0.232 ms         3015
            +OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3114
            +OpenCV_TopHat2D_Constant_Padding/1                     0.264 ms        0.264 ms         2647
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2672
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.255 ms        0.255 ms         2749
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5111
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..0f80d1aa --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            MLIR_Conv2D/166.666.611
            Buddy_Conv2D/16.136.13114
            Buddy_Corr2D_Constant_Padding/14.654.65151
            OpenCV_Filter2D_Constant_Padding/18.68.681
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,362
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,959
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,220
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,263
            Buddy_Opening2D_Constant_Padding/10.310.312,246
            Buddy_Closing2D_Constant_Padding/10.310.312,217
            Buddy_TopHat2D_Constant_Padding/10.7780.778828
            Buddy_BottomHat2D_Constant_Padding/10.7930.793833
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,129
            OpenCV_Opening2D_Constant_Padding/10.2260.2263,091
            OpenCV_Closing2D_Constant_Padding/10.2260.2263,096
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,677
            OpenCV_BottomHat2D_Constant_Padding/10.2610.2612,684
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,763
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,132
            +
            Console output +
            2025-06-01T10:02:32+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.04, 1.45, 2.33
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      21.5 ms         21.5 ms           32
            +MLIR_Conv2D/1                                           66.6 ms         66.6 ms           11
            +Buddy_Conv2D/1                                          6.13 ms         6.13 ms          114
            +Buddy_Corr2D_Constant_Padding/1                         4.65 ms         4.65 ms          151
            +OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105362
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49959
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3220
            +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3263
            +Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2246
            +Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2217
            +Buddy_TopHat2D_Constant_Padding/1                      0.778 ms        0.778 ms          828
            +Buddy_BottomHat2D_Constant_Padding/1                   0.793 ms        0.793 ms          833
            +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5129
            +OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3091
            +OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3096
            +OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2677
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.261 ms        0.261 ms         2684
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2763
            +OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5132
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html new file mode 100644 index 00000000..c8db88a9 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            MLIR_Conv2D/11441445
            Buddy_Conv2D/110.510.567
            Buddy_Corr2D_Constant_Padding/17.957.9590
            OpenCV_Filter2D_Constant_Padding/15.895.89120
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,856
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,165
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,405
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,218
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,226
            Buddy_Opening2D_Constant_Padding/10.3120.3122,175
            Buddy_Closing2D_Constant_Padding/10.3120.3122,264
            Buddy_TopHat2D_Constant_Padding/10.8210.821843
            Buddy_BottomHat2D_Constant_Padding/10.8180.818844
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,142
            OpenCV_Opening2D_Constant_Padding/10.2210.2213,174
            OpenCV_Closing2D_Constant_Padding/10.2210.2213,164
            OpenCV_TopHat2D_Constant_Padding/10.2560.2562,735
            OpenCV_BottomHat2D_Constant_Padding/10.2580.2582,710
            OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,787
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,117
            +
            Console output +
            2025-06-01T10:02:56+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.03, 1.42, 2.30
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      41.5 ms         41.5 ms           17
            +MLIR_Conv2D/1                                            144 ms          144 ms            5
            +Buddy_Conv2D/1                                          10.5 ms         10.5 ms           67
            +Buddy_Corr2D_Constant_Padding/1                         7.95 ms         7.95 ms           90
            +OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          120
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4856
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2692
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105165
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49405
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3218
            +Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3226
            +Buddy_Opening2D_Constant_Padding/1                     0.312 ms        0.312 ms         2175
            +Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2264
            +Buddy_TopHat2D_Constant_Padding/1                      0.821 ms        0.821 ms          843
            +Buddy_BottomHat2D_Constant_Padding/1                   0.818 ms        0.818 ms          844
            +OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5142
            +OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3174
            +OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3164
            +OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2735
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.258 ms        0.258 ms         2710
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2787
            +OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5117
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html new file mode 100644 index 00000000..9b65cea6 --- /dev/null +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -0,0 +1,95 @@ + + + +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            + + + + + + + + + + + + + + + + + + + + + + +
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            MLIR_Conv2D/11191196
            Buddy_Conv2D/110.510.567
            Buddy_Corr2D_Constant_Padding/17.897.8990
            OpenCV_Filter2D_Constant_Padding/15.895.89119
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,690
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,068
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,449
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,244
            Buddy_Dilation2D_Constant_Padding/10.2250.2253,243
            Buddy_Opening2D_Constant_Padding/10.3070.3072,260
            Buddy_Closing2D_Constant_Padding/10.3130.3132,223
            Buddy_TopHat2D_Constant_Padding/10.8180.818827
            Buddy_BottomHat2D_Constant_Padding/10.7970.796861
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,101
            OpenCV_Opening2D_Constant_Padding/10.2190.2193,187
            OpenCV_Closing2D_Constant_Padding/10.2220.2223,142
            OpenCV_TopHat2D_Constant_Padding/10.2560.2562,731
            OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,740
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,815
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,206
            +
            Console output +
            2025-06-01T10:03:20+00:00
            +Running ./bin/image-processing-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.02, 1.38, 2.27
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------------------------------------------
            +Benchmark                                                  Time             CPU   Iterations
            +--------------------------------------------------------------------------------------------
            +Eigen_Convolve2D/1                                      34.2 ms         34.2 ms           20
            +MLIR_Conv2D/1                                            119 ms          119 ms            6
            +Buddy_Conv2D/1                                          10.5 ms         10.5 ms           67
            +Buddy_Corr2D_Constant_Padding/1                         7.89 ms         7.89 ms           90
            +OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
            +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            +Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2690
            +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105068
            +OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49449
            +Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3244
            +Buddy_Dilation2D_Constant_Padding/1                    0.225 ms        0.225 ms         3243
            +Buddy_Opening2D_Constant_Padding/1                     0.307 ms        0.307 ms         2260
            +Buddy_Closing2D_Constant_Padding/1                     0.313 ms        0.313 ms         2223
            +Buddy_TopHat2D_Constant_Padding/1                      0.818 ms        0.818 ms          827
            +Buddy_BottomHat2D_Constant_Padding/1                   0.797 ms        0.796 ms          861
            +OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5101
            +OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3187
            +OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3142
            +OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2731
            +OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2740
            +OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2815
            +OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5206
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +Saved PNG file.
            +
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/index.html b/site/benchmarks/2025-07-27/index.html new file mode 100644 index 00000000..25d339ca --- /dev/null +++ b/site/benchmarks/2025-07-27/index.html @@ -0,0 +1,14 @@ +--- +layout: default +title: Benchmark run +--- + +

            Benchmark results

            + +
              +{% for f in site.static_files %} + {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %} +
            • {{ f.name }}
            • + {% endif %} +{% endfor %} +
            diff --git a/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html b/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html new file mode 100644 index 00000000..643b4664 --- /dev/null +++ b/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html @@ -0,0 +1,40 @@ + + + +

            vectorization/vectorization_matrix.json

            2025-07-27 17:05:43 UTC

            +

            vectorization_matrix.json

            + + +
            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            MLIR_MatVec/120.520.535,030,976
            +
            Console output +
            2025-06-01T10:11:11+00:00
            +Running ./vectorization-matrix-benchmark
            +Run on (24 X 5100 MHz CPU s)
            +CPU Caches:
            +  L1 Data 48 KiB (x12)
            +  L1 Instruction 32 KiB (x12)
            +  L2 Unified 1280 KiB (x12)
            +  L3 Unified 30720 KiB (x1)
            +Load Average: 1.00, 1.09, 1.76
            +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            +--------------------------------------------------------
            +Benchmark              Time             CPU   Iterations
            +--------------------------------------------------------
            +MLIR_MatMul/1       18.8 ns         18.8 ns     37302822
            +MLIR_MatVec/1       20.5 ns         20.5 ns     35030976
            +--------------------------------------------------------
            +MLIR_MatMul: MLIR MatMul Operation + Nested Loop
            +[ 18 18 18 18 18 18 18 18 18 18 ]
            +--------------------------------------------------------
            +MLIR_MatVec: MLIR MatVec Operation
            +[ 18 18 18 18 18 18 18 18 18 18 ]
            +
            \ No newline at end of file diff --git a/site/benchmarks/latest/index.html b/site/benchmarks/latest/index.html new file mode 100644 index 00000000..78e7713b --- /dev/null +++ b/site/benchmarks/latest/index.html @@ -0,0 +1 @@ + From e052b97d4f2ac41a808a54255a4839d5940f20db Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 17:28:25 +0000 Subject: [PATCH 37/52] test --- .github/workflows/bench.yml | 11 +++++++++-- .../deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../deeplearning/dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../deeplearning/dl-model-lenet-benchmark.html | 2 +- .../deeplearning/dl-model-mobilenetv3-benchmark.html | 2 +- .../deeplearning/dl-model-resnet18-benchmark.html | 2 +- .../deeplearning/dl-model-tinyllama-benchmark.html | 2 +- .../deeplearning/dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...p-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../deeplearning/dl-op-linalg-mathexp-benchmark.html | 2 +- .../deeplearning/dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../deeplearning/dl-op-linalg-matmul-benchmark.html | 2 +- .../dl-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- .../dl-op-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../deeplearning/dl-op-tosa-transpose-benchmark.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...lign_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...ign_random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- .../vectorization/vectorization_matrix.html | 2 +- 58 files changed, 66 insertions(+), 59 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index e2f23291..f024ae6d 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -57,8 +57,15 @@ jobs: working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark run: | rm -rf site - run_root="${{ env.BENCH_DIR }}" - python3 scripts/logs2html.py test_result $run_root + python3 scripts/logs2html.py test_result site + + - name: Detect BENCH_DIR + working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks + run: | + latest=$(ls -1d 20*/ | sort -r | head -n1 | tr -d '/') + echo "BENCH_DIR=$PWD/$latest" >> "$GITHUB_ENV" + echo "BENCH_DATE=$latest" >> "$GITHUB_ENV" + echo "[debug] BENCH_DIR = $PWD/$latest" - name: Update benchmarks/latest redirect working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html index 4014096d..a517f094 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-layer-ffn-benchmark.json

            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html index 4902feb5..be257ea8 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06540.065410,762
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html index 65b08cd1..122c99d3 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196356,202
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html index 3a412d45..8fa65511 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html index 153f70be..fc5b0ed1 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html index e3f9ddf5..a452d939 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html index 8c7fa324..48e81f52 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7317231
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html index 27cd97e0..43782ebd 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html index bd191a74..e13c7160 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization8e+048e+041
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 47bbe164..c236674f 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02950.029523,451
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html index d36d41d0..28bef57f 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02980.029823,358
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html index 481912bc..036a21c1 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02980.029823,441
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html index ef9a2d7e..06fa1b02 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02250.022530,969
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 24cb1ff1..1aecb865 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02940.029423,509
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index 07ef332e..7de6bddf 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 2b39ddd0..8eb98c01 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index e986e4b4..55cdb3d9 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 915ff12a..8f81e104 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html index 471b0e0a..679a30ba 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html index f84f32bf..691e015d 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,225
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index ed460f7b..432fc0ca 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08410.08418,255
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html index edf19084..b8f7dad7 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,537
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index 4c682618..5dd1bedd 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index fc53559a..74fc180b 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 17:19:21 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index dd5a2659..60e11897 100644
            --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 17:19:21 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 07b0d427..085a3f99 100644
            --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 07dc878a..c86aa460 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html index 79e1e3c3..b12b8692 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 17:05:43 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 17:19:21 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 4b1e7a32..55bec310 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 6b3a47c9..8d3fe97d 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 5522eb0e..e9b21b02 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 035ae8cb..171587c3 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index c5576381..781a4cd7 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 7d763bbc..2f65d57f 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e370d7f3..5d407ec2 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index b06ad02c..a6405b18 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 6a9399ab..0251a413 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 88cebeb3..ade74ebb 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b95a586d..15b6442b 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index c1d02dd5..aaf7ff56 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b563ae9d..ceadff67 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 3821bcb4..065c470e 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 86b9fc22..e3b1a7f7 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index a11ce7f4..236b0917 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 8585342c..0c696e1a 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 4fb7e607..be03610a 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 19d8035f..075f740a 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 0873c54b..5e449087 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 11997445..0e176b0d 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 978110d2..fd4d2fc2 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 77463382..573135ca 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 1e97ad5b..31ffa9b2 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index ef1965aa..0297449e 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 0f80d1aa..ab6f0195 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index c8db88a9..8c8e3c06 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 9b65cea6..2b621846 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:05:43 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html b/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html index 643b4664..d857886c 100644 --- a/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html +++ b/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-07-27 17:05:43 UTC

            +

            vectorization/vectorization_matrix.json

            2025-07-27 17:19:21 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From 38dfdfce0b4e2ffd853a210588b30b1098ef3ccd Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 17:34:20 +0000 Subject: [PATCH 38/52] test --- .github/workflows/bench.yml | 11 ++--------- .../2025-07-27/benchmarks/2025-07-27/index.html | 14 -------------- .../deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../deeplearning/dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../deeplearning/dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../deeplearning/dl-model-resnet18-benchmark.html | 2 +- .../deeplearning/dl-model-tinyllama-benchmark.html | 2 +- .../deeplearning/dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...inalg-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- .../dl-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...dl-op-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- .../vectorization/vectorization_matrix.html | 2 +- 59 files changed, 59 insertions(+), 80 deletions(-) delete mode 100644 site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-layer-ffn-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-layer-rmsnorm-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-layer-selfattention-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-model-lenet-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-model-mobilenetv3-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-model-resnet18-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-model-tinyllama-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-model-whisper-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-arithaddf-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-arithdivf-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-arithmulf-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-arithnegf-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-arithsubf-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-batch-matmul-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html (97%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html (97%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html (97%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-mathexp-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-mathfpow-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-matmul-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html (97%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-reduceaddf-benchmark.html (96%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-reducemaxf-benchmark.html (96%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html (97%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-matmul-transpose-b-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/deeplearning/dl-op-tosa-transpose-benchmark.html (98%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{benchmarks/2025-07-27 => }/vectorization/vectorization_matrix.html (98%) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index f024ae6d..a5c70af6 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -57,15 +57,8 @@ jobs: working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark run: | rm -rf site - python3 scripts/logs2html.py test_result site - - - name: Detect BENCH_DIR - working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks - run: | - latest=$(ls -1d 20*/ | sort -r | head -n1 | tr -d '/') - echo "BENCH_DIR=$PWD/$latest" >> "$GITHUB_ENV" - echo "BENCH_DATE=$latest" >> "$GITHUB_ENV" - echo "[debug] BENCH_DIR = $PWD/$latest" + run_root="${{ env.BENCH_DIR }}" + python3 scripts/logs2html.py test_result $run_root/ - name: Update benchmarks/latest redirect working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks diff --git a/site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html b/site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html deleted file mode 100644 index 1641d47e..00000000 --- a/site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html +++ /dev/null @@ -1,14 +0,0 @@ - - -

            Buddy-Benchmark results

              - -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html b/site/deeplearning/dl-layer-ffn-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html rename to site/deeplearning/dl-layer-ffn-benchmark.html index a517f094..976d8109 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/deeplearning/dl-layer-rmsnorm-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html rename to site/deeplearning/dl-layer-rmsnorm-benchmark.html index be257ea8..60d0a2b6 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06540.065410,762
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html b/site/deeplearning/dl-layer-selfattention-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html rename to site/deeplearning/dl-layer-selfattention-benchmark.html index 122c99d3..ed2d6177 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196356,202
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html b/site/deeplearning/dl-model-lenet-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html rename to site/deeplearning/dl-model-lenet-benchmark.html index 8fa65511..d87fe823 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html +++ b/site/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/deeplearning/dl-model-mobilenetv3-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html rename to site/deeplearning/dl-model-mobilenetv3-benchmark.html index fc5b0ed1..17109311 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html b/site/deeplearning/dl-model-resnet18-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html rename to site/deeplearning/dl-model-resnet18-benchmark.html index a452d939..e05e0d2c 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html b/site/deeplearning/dl-model-tinyllama-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html rename to site/deeplearning/dl-model-tinyllama-benchmark.html index 48e81f52..dce6e69e 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7317231
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html b/site/deeplearning/dl-model-whisper-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html rename to site/deeplearning/dl-model-whisper-benchmark.html index 43782ebd..3b15b9a3 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html +++ b/site/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html rename to site/deeplearning/dl-op-linalg-arithaddf-benchmark.html index e13c7160..e840b644 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization8e+048e+041
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html rename to site/deeplearning/dl-op-linalg-arithdivf-benchmark.html index c236674f..7142ecc3 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02950.029523,451
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html rename to site/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 28bef57f..3cc037cb 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02980.029823,358
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html rename to site/deeplearning/dl-op-linalg-arithnegf-benchmark.html index 036a21c1..fd2b7d08 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02980.029823,441
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html rename to site/deeplearning/dl-op-linalg-arithsubf-benchmark.html index 06fa1b02..fa32b3ab 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02250.022530,969
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html rename to site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 1aecb865..75031c07 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02940.029423,509
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html similarity index 97% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html rename to site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index 7de6bddf..e27c54e6 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html rename to site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 8eb98c01..31c90b55 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html similarity index 97% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html rename to site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index 55cdb3d9..b31b4bf4 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html similarity index 97% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html rename to site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 8f81e104..9c8b3b57 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html rename to site/deeplearning/dl-op-linalg-mathexp-benchmark.html index 679a30ba..c65938cf 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html rename to site/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 691e015d..77e37dbd 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,225
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html rename to site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index 432fc0ca..bc08f294 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08410.08418,255
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/deeplearning/dl-op-linalg-matmul-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html rename to site/deeplearning/dl-op-linalg-matmul-benchmark.html index b8f7dad7..00f1cc85 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,537
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html similarity index 97% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html rename to site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index 5dd1bedd..ac5b6b8d 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html similarity index 96% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html rename to site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index 74fc180b..b5a83174 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 17:28:46 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            similarity index 96%
            rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            rename to site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index 60e11897..4c9779f9 100644
            --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 17:28:46 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            similarity index 97%
            rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            rename to site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 085a3f99..76dc1ed3 100644
            --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html rename to site/deeplearning/dl-op-matmul-transpose-b-benchmark.html index c86aa460..4845e3b2 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/deeplearning/dl-op-tosa-transpose-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html rename to site/deeplearning/dl-op-tosa-transpose-benchmark.html index b12b8692..1bc2f78b 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 17:19:21 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 17:28:46 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 55bec310..b3738044 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8d3fe97d..b3a377e2 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e9b21b02..1e1db228 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 171587c3..ada192b7 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 781a4cd7..b6a481c9 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 2f65d57f..92e75e95 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 5d407ec2..be47d56e 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index a6405b18..2b0414cf 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 0251a413..4bdfcfcc 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index ade74ebb..8bc5f381 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 15b6442b..08347694 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index aaf7ff56..cd824737 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index ceadff67..d8641fae 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 065c470e..4f4b88f9 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index e3b1a7f7..a471a566 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 236b0917..dd322dd9 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 0c696e1a..d531736b 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index be03610a..23f5513b 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 075f740a..a12b7529 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5e449087..4748e797 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 0e176b0d..ef60679d 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index fd4d2fc2..9e41f9d5 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 573135ca..cda42f52 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 31ffa9b2..87dc0601 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 0297449e..900d0442 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index ab6f0195..aab8e68c 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 8c8e3c06..a207f8f2 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 2b621846..87a3e345 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:19:21 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html b/site/vectorization/vectorization_matrix.html similarity index 98% rename from site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html rename to site/vectorization/vectorization_matrix.html index d857886c..ff4c77cb 100644 --- a/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html +++ b/site/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-07-27 17:19:21 UTC

            +

            vectorization/vectorization_matrix.json

            2025-07-27 17:28:46 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From 747c01564b9fd3515d28209766d1aae1034159e9 Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 17:54:10 +0000 Subject: [PATCH 39/52] test --- .github/workflows/bench.yml | 14 ++++++-------- .../2025-07-27/benchmarks/2025-07-27/index.html | 14 ++++++++++++++ .../deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../deeplearning/dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../deeplearning/dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../deeplearning/dl-model-resnet18-benchmark.html | 2 +- .../deeplearning/dl-model-tinyllama-benchmark.html | 2 +- .../deeplearning/dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- .../dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...inalg-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- .../dl-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...dl-op-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...n_random3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ..._random3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- .../vectorization/vectorization_matrix.html | 2 +- 59 files changed, 77 insertions(+), 65 deletions(-) create mode 100644 site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-layer-ffn-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-layer-rmsnorm-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-layer-selfattention-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-model-lenet-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-model-mobilenetv3-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-model-resnet18-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-model-tinyllama-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-model-whisper-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-arithaddf-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-arithdivf-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-arithmulf-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-arithnegf-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-arithsubf-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-batch-matmul-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html (97%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html (97%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html (97%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-mathexp-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-mathfpow-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-matmul-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html (97%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-reduceaddf-benchmark.html (96%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-reducemaxf-benchmark.html (96%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html (97%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-matmul-transpose-b-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/deeplearning/dl-op-tosa-transpose-benchmark.html (98%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/{ => benchmarks/2025-07-27}/vectorization/vectorization_matrix.html (98%) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index a5c70af6..1215ba93 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -41,7 +41,7 @@ jobs: run: | bench_date=$(date +'%Y-%m-%d') echo "BENCH_DATE=$bench_date" >> "$GITHUB_ENV" - echo "BENCH_DIR=$HOME/buddy-complier-workspace/buddy-benchmark/site/benchmarks/$bench_date" >> "$GITHUB_ENV" + echo "BENCH_DIR=$HOME/buddy-complier-workspace/buddy-benchmark/site/benchmarks/$bench_date/${GITHUB_SHA}" >> "$GITHUB_ENV" # ------------------------------------------------------------ # 3-5) (UNCHANGED) upload raw logs, convert to HTML, deploy Pages @@ -57,8 +57,8 @@ jobs: working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark run: | rm -rf site - run_root="${{ env.BENCH_DIR }}" - python3 scripts/logs2html.py test_result $run_root/ + mkdir -p "${{ env.BENCH_DIR }}" + python3 scripts/logs2html.py test_result "${{ env.BENCH_DIR }}/" - name: Update benchmarks/latest redirect working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks @@ -110,9 +110,7 @@ jobs: with: personal_token: ${{ secrets.BUDDY_SITE_PAT }} external_repository: buddy-compiler/buddy-compiler.github.io - publish_dir: "${{ env.BENCH_DIR }}" - destination_dir: benchmarks/${{ github.sha }} + publish_dir: /home/quliu/buddy-complier-workspace/buddy-benchmark/site # <- root of generated site publish_branch: master - keep_files: true - enable_jekyll: true - + keep_files: true # keep earlier runs + enable_jekyll: true \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html b/site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html new file mode 100644 index 00000000..1641d47e --- /dev/null +++ b/site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html @@ -0,0 +1,14 @@ + + +

            Buddy-Benchmark results

              + +
            \ No newline at end of file diff --git a/site/deeplearning/dl-layer-ffn-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html similarity index 98% rename from site/deeplearning/dl-layer-ffn-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html index 976d8109..fe2e3761 100644 --- a/site/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html similarity index 98% rename from site/deeplearning/dl-layer-rmsnorm-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html index 60d0a2b6..616fd4f0 100644 --- a/site/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06540.065410,762
            diff --git a/site/deeplearning/dl-layer-selfattention-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html similarity index 98% rename from site/deeplearning/dl-layer-selfattention-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html index ed2d6177..f230f564 100644 --- a/site/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196356,202
            diff --git a/site/deeplearning/dl-model-lenet-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html similarity index 98% rename from site/deeplearning/dl-model-lenet-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html index d87fe823..96d31be3 100644 --- a/site/deeplearning/dl-model-lenet-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html similarity index 98% rename from site/deeplearning/dl-model-mobilenetv3-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html index 17109311..85465148 100644 --- a/site/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304
            diff --git a/site/deeplearning/dl-model-resnet18-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html similarity index 98% rename from site/deeplearning/dl-model-resnet18-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html index e05e0d2c..ce2a0567 100644 --- a/site/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119
            diff --git a/site/deeplearning/dl-model-tinyllama-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html similarity index 98% rename from site/deeplearning/dl-model-tinyllama-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html index dce6e69e..4b0a8a97 100644 --- a/site/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7317231
            diff --git a/site/deeplearning/dl-model-whisper-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html similarity index 98% rename from site/deeplearning/dl-model-whisper-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html index 3b15b9a3..2cab9076 100644 --- a/site/deeplearning/dl-model-whisper-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051
            diff --git a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-linalg-arithaddf-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html index e840b644..274ab7a9 100644 --- a/site/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization8e+048e+041
            diff --git a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-linalg-arithdivf-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 7142ecc3..1478e86e 100644 --- a/site/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02950.029523,451
            diff --git a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-linalg-arithmulf-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html index 3cc037cb..e39fcbab 100644 --- a/site/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02980.029823,358
            diff --git a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-linalg-arithnegf-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html index fd2b7d08..9de6b58e 100644 --- a/site/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02980.029823,441
            diff --git a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-linalg-arithsubf-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html index fa32b3ab..69fc9ec5 100644 --- a/site/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02250.022530,969
            diff --git a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 75031c07..11ecfcd3 100644 --- a/site/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02940.029423,509
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html similarity index 97% rename from site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index e27c54e6..0c22cbb9 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index 31c90b55..b9b948b6 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html similarity index 97% rename from site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index b31b4bf4..c116bae2 100644 --- a/site/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            diff --git a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html similarity index 97% rename from site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 9c8b3b57..01603307 100644 --- a/site/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322
            diff --git a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-linalg-mathexp-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html index c65938cf..317ff811 100644 --- a/site/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255
            diff --git a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-linalg-mathfpow-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 77e37dbd..98c22f5c 100644 --- a/site/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,225
            diff --git a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index bc08f294..6ca3052b 100644 --- a/site/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08410.08418,255
            diff --git a/site/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-linalg-matmul-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html index 00f1cc85..38efb3f7 100644 --- a/site/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,537
            diff --git a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html similarity index 97% rename from site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index ac5b6b8d..3f34700d 100644 --- a/site/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031
            diff --git a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html similarity index 96% rename from site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index b5a83174..44c0f6a3 100644 --- a/site/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 17:34:50 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            similarity index 96%
            rename from site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index 4c9779f9..5c4d33d2 100644
            --- a/site/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 17:34:50 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            similarity index 97%
            rename from site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            rename to site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 76dc1ed3..8b761020 100644
            --- a/site/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002
            diff --git a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-matmul-transpose-b-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 4845e3b2..7342f05e 100644 --- a/site/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646
            diff --git a/site/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html similarity index 98% rename from site/deeplearning/dl-op-tosa-transpose-benchmark.html rename to site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html index 1bc2f78b..e0f178c0 100644 --- a/site/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 17:28:46 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 17:34:50 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b3738044..05643bac 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45
            diff --git a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index b3a377e2..176dddb6 100644 --- a/site/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 1e1db228..5f64aa3d 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index ada192b7..3b7bd82b 100644 --- a/site/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index b6a481c9..cec715cb 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 92e75e95..644ec7dc 100644 --- a/site/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index be47d56e..184ee9ec 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 2b0414cf..f8764c60 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 4bdfcfcc..73530d2a 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8bc5f381..90b72d8f 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 08347694..1e369505 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index cd824737..02422a27 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index d8641fae..27effb56 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 4f4b88f9..5f3ec226 100644 --- a/site/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a471a566..474be48b 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index dd322dd9..1fb9bbf6 100644 --- a/site/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index d531736b..6b7333b2 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 23f5513b..e416de93 100644 --- a/site/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a12b7529..c7693c10 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 4748e797..2cf54f46 100644 --- a/site/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index ef60679d..c50a4c77 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 9e41f9d5..43db2e50 100644 --- a/site/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index cda42f52..4306f79a 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 87dc0601..8bd12d57 100644 --- a/site/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 900d0442..c9fae1e0 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index aab8e68c..79618f2f 100644 --- a/site/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index a207f8f2..513dfa5a 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 87a3e345..53382fe3 100644 --- a/site/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:28:46 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/vectorization/vectorization_matrix.html b/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html similarity index 98% rename from site/vectorization/vectorization_matrix.html rename to site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html index ff4c77cb..0dc0ca3a 100644 --- a/site/vectorization/vectorization_matrix.html +++ b/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-07-27 17:28:46 UTC

            +

            vectorization/vectorization_matrix.json

            2025-07-27 17:34:50 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From 5b340564e7813e5e5bd7f455b7912f8802573358 Mon Sep 17 00:00:00 2001 From: LIUQyou <1343451020@qq.com> Date: Sun, 27 Jul 2025 18:17:03 +0000 Subject: [PATCH 40/52] test --- .github/workflows/bench.yml | 20 +++++++------------ .../benchmarks/2025-07-27/index.html | 0 .../deeplearning/dl-layer-ffn-benchmark.html | 2 +- .../dl-layer-rmsnorm-benchmark.html | 2 +- .../dl-layer-selfattention-benchmark.html | 2 +- .../dl-model-lenet-benchmark.html | 2 +- .../dl-model-mobilenetv3-benchmark.html | 2 +- .../dl-model-resnet18-benchmark.html | 2 +- .../dl-model-tinyllama-benchmark.html | 2 +- .../dl-model-whisper-benchmark.html | 2 +- .../dl-op-linalg-arithaddf-benchmark.html | 2 +- .../dl-op-linalg-arithdivf-benchmark.html | 2 +- .../dl-op-linalg-arithmulf-benchmark.html | 2 +- .../dl-op-linalg-arithnegf-benchmark.html | 2 +- .../dl-op-linalg-arithsubf-benchmark.html | 2 +- .../dl-op-linalg-batch-matmul-benchmark.html | 2 +- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 2 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 2 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 2 +- .../dl-op-linalg-mathexp-benchmark.html | 2 +- .../dl-op-linalg-mathfpow-benchmark.html | 2 +- .../dl-op-linalg-mathrsqrt-benchmark.html | 2 +- .../dl-op-linalg-matmul-benchmark.html | 2 +- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 2 +- .../dl-op-linalg-reduceaddf-benchmark.html | 2 +- .../dl-op-linalg-reducemaxf-benchmark.html | 2 +- ...-linalg-softmax-exp-sum-div-benchmark.html | 2 +- .../dl-op-matmul-transpose-b-benchmark.html | 2 +- .../dl-op-tosa-transpose-benchmark.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 2 +- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 2 +- .../index.html | 0 .../vectorization/vectorization_matrix.html | 2 +- 60 files changed, 64 insertions(+), 70 deletions(-) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/benchmarks/2025-07-27/index.html (100%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-layer-ffn-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-layer-rmsnorm-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-layer-selfattention-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-model-lenet-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-model-mobilenetv3-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-model-resnet18-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-model-tinyllama-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-model-whisper-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-arithaddf-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-arithdivf-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-arithmulf-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-arithnegf-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-arithsubf-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-batch-matmul-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html (97%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html (97%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html (97%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-mathexp-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-mathfpow-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-matmul-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html (97%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-reduceaddf-benchmark.html (96%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-reducemaxf-benchmark.html (96%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html (97%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-matmul-transpose-b-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/deeplearning/dl-op-tosa-transpose-benchmark.html (98%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html (99%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/index.html (100%) rename site/benchmarks/2025-07-27/{ => 747c01564b9fd3515d28209766d1aae1034159e9}/vectorization/vectorization_matrix.html (98%) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 1215ba93..07f6ef11 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -60,22 +60,16 @@ jobs: mkdir -p "${{ env.BENCH_DIR }}" python3 scripts/logs2html.py test_result "${{ env.BENCH_DIR }}/" - - name: Update benchmarks/latest redirect - working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks + # ------------------------------------------------------------ + # 4) make /benchmarks/ point to the most recent run as well + # ------------------------------------------------------------ + - name: Add top-level benchmarks index + working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site run: | set -e - # -------- pick the most recent dated folder (YYYY-MM-DD) ------------- - latest=$(ls -1d 20*/ | sort -r | head -n1 | tr -d '/') - echo "[Info] newest run is: $latest" - - # -------- rebuild the 'latest' folder with a meta-refresh ------------ - rm -rf latest - mkdir -p latest - cat > latest/index.html < + cat > benchmarks/index.html <<'EOF' + EOF - echo "[Info] benchmarks/latest now points to ../${latest}/" - - name: Upload site artifact uses: actions/upload-pages-artifact@v3 diff --git a/site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/benchmarks/2025-07-27/index.html similarity index 100% rename from site/benchmarks/2025-07-27/benchmarks/2025-07-27/index.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/benchmarks/2025-07-27/index.html diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-ffn-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-ffn-benchmark.html index fe2e3761..2a3eee65 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-layer-ffn-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-ffn-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-layer-ffn-benchmark.json

            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-rmsnorm-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-rmsnorm-benchmark.html index 616fd4f0..00796040 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-layer-rmsnorm-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-rmsnorm-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-layer-rmsnorm-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06540.065410,762
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-selfattention-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-selfattention-benchmark.html index f230f564..bd2db1af 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-layer-selfattention-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-selfattention-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-layer-selfattention-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196356,202
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-lenet-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-lenet-benchmark.html index 96d31be3..2fa5e6a3 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-lenet-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-lenet-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-model-lenet-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-mobilenetv3-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-mobilenetv3-benchmark.html index 85465148..3ad148a6 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-mobilenetv3-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-mobilenetv3-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-model-mobilenetv3-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-resnet18-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-resnet18-benchmark.html index ce2a0567..161db951 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-resnet18-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-resnet18-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-model-resnet18-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-tinyllama-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-tinyllama-benchmark.html index 4b0a8a97..2627afe7 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-tinyllama-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-tinyllama-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-model-tinyllama-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7317231
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-whisper-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-whisper-benchmark.html index 2cab9076..371cd9bf 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-model-whisper-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-whisper-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-model-whisper-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithaddf-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithaddf-benchmark.html index 274ab7a9..e6a48c8f 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-arithaddf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization8e+048e+041
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithdivf-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithdivf-benchmark.html index 1478e86e..8f642d99 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithdivf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-arithdivf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02950.029523,451
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithmulf-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithmulf-benchmark.html index e39fcbab..b490a073 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithmulf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-arithmulf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02980.029823,358
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithnegf-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithnegf-benchmark.html index 9de6b58e..3ac73c79 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithnegf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-arithnegf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02980.029823,441
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithsubf-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithsubf-benchmark.html index 69fc9ec5..955190f4 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithsubf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-arithsubf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02250.022530,969
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-batch-matmul-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-batch-matmul-benchmark.html index 11ecfcd3..7426c7e3 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-batch-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-batch-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02940.029423,509
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html similarity index 97% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html index 0c22cbb9..f9329509 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html index b9b948b6..6199f4e4 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html similarity index 97% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html index c116bae2..497cb50b 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html similarity index 97% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html index 01603307..b45bf170 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathexp-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathexp-benchmark.html index 317ff811..b631432d 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathexp-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-mathexp-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathfpow-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathfpow-benchmark.html index 98c22f5c..cdd4dd3c 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathfpow-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-mathfpow-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,225
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html index 6ca3052b..89a528a2 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-mathrsqrt-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08410.08418,255
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-matmul-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-matmul-benchmark.html index 38efb3f7..7b8b15e5 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-matmul-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-matmul-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-matmul-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,537
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html similarity index 97% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html index 3f34700d..4a1b820d 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reduceaddf-benchmark.html similarity index 96% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reduceaddf-benchmark.html index 44c0f6a3..f07dc704 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reduceaddf-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 17:54:34 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            similarity index 96%
            rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            index 5c4d33d2..1a007354 100644
            --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reducemaxf-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 17:54:34 UTC

            ⚠ FAILED: JSON parse error: Expecting value
            Console output
            2025-07-27T14:27:39+00:00
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            similarity index 97%
            rename from site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            index 8b761020..71c06740 100644
            --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html
            @@ -10,7 +10,7 @@
             .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem}
             
             
            -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-matmul-transpose-b-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-matmul-transpose-b-benchmark.html index 7342f05e..c01788e3 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-matmul-transpose-b-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-matmul-transpose-b-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646
            diff --git a/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-tosa-transpose-benchmark.html similarity index 98% rename from site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-tosa-transpose-benchmark.html index e0f178c0..267f6470 100644 --- a/site/benchmarks/2025-07-27/deeplearning/dl-op-tosa-transpose-benchmark.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-tosa-transpose-benchmark.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 17:34:50 UTC

            +

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 17:54:34 UTC

            dl-op-tosa-transpose-benchmark.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 05643bac..01c24ea8 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 176dddb6..1b11967a 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 5f64aa3d..35599864 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 3b7bd82b..714bf9ca 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index cec715cb..eab7f8ec 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 644ec7dc..50abb917 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 184ee9ec..3389cc83 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index f8764c60..b2958986 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 73530d2a..13bbced9 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 90b72d8f..40dbd47e 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 1e369505..10faaad4 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 02422a27..e4137c5f 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 27effb56..2310ca2d 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 5f3ec226..1fe16fe8 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 474be48b..b53a109b 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 1fb9bbf6..00696aaa 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 6b7333b2..01d84f2f 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index e416de93..bb99d217 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index c7693c10..b51ee6f8 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 2cf54f46..9ff6cb52 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index c50a4c77..d6181597 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 43db2e50..412d449c 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 4306f79a..905cc8e2 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 8bd12d57..85d9b2ed 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index c9fae1e0..cf167376 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 79618f2f..9dc07381 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html index 513dfa5a..bdd3c9f0 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            diff --git a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html similarity index 99% rename from site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html index 53382fe3..e52e0fbd 100644 --- a/site/benchmarks/2025-07-27/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:34:50 UTC

            +

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            diff --git a/site/benchmarks/2025-07-27/index.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/index.html similarity index 100% rename from site/benchmarks/2025-07-27/index.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/index.html diff --git a/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/vectorization/vectorization_matrix.html similarity index 98% rename from site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html rename to site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/vectorization/vectorization_matrix.html index 0dc0ca3a..a4c1022e 100644 --- a/site/benchmarks/2025-07-27/vectorization/vectorization_matrix.html +++ b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/vectorization/vectorization_matrix.html @@ -10,7 +10,7 @@ .err{border:2px solid #c00;background:#fee;padding:1rem;border-radius:.5rem} -

            vectorization/vectorization_matrix.json

            2025-07-27 17:34:50 UTC

            +

            vectorization/vectorization_matrix.json

            2025-07-27 17:54:34 UTC

            vectorization_matrix.json

            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            From 7625f845ac92b96f2b7fdf8829ca0b691a32415c Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 14:46:48 +0200 Subject: [PATCH 41/52] update --- .github/workflows/bench.yml | 20 +++- .gitignore | 11 +++ .gitmodules | 4 + scripts/logs2html.py | 23 ++--- .../benchmarks/2025-07-27/index.html | 14 --- .../deeplearning/dl-layer-ffn-benchmark.html | 37 -------- .../dl-layer-rmsnorm-benchmark.html | 37 -------- .../dl-layer-selfattention-benchmark.html | 37 -------- .../dl-model-lenet-benchmark.html | 38 -------- .../dl-model-mobilenetv3-benchmark.html | 38 -------- .../dl-model-resnet18-benchmark.html | 37 -------- .../dl-model-tinyllama-benchmark.html | 39 -------- .../dl-model-whisper-benchmark.html | 38 -------- .../dl-op-linalg-arithaddf-benchmark.html | 38 -------- .../dl-op-linalg-arithdivf-benchmark.html | 38 -------- .../dl-op-linalg-arithmulf-benchmark.html | 38 -------- .../dl-op-linalg-arithnegf-benchmark.html | 38 -------- .../dl-op-linalg-arithsubf-benchmark.html | 38 -------- .../dl-op-linalg-batch-matmul-benchmark.html | 49 ---------- ...-op-linalg-conv2d-nchw-fchw-benchmark.html | 38 -------- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.html | 42 -------- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.html | 38 -------- ...-depthwise-conv-2d-nhwc-hwc-benchmark.html | 39 -------- .../dl-op-linalg-mathexp-benchmark.html | 38 -------- .../dl-op-linalg-mathfpow-benchmark.html | 38 -------- .../dl-op-linalg-mathrsqrt-benchmark.html | 38 -------- .../dl-op-linalg-matmul-benchmark.html | 44 --------- ...-op-linalg-pooling-nhwc-sum-benchmark.html | 38 -------- .../dl-op-linalg-reduceaddf-benchmark.html | 26 ----- .../dl-op-linalg-reducemaxf-benchmark.html | 26 ----- ...-linalg-softmax-exp-sum-div-benchmark.html | 38 -------- .../dl-op-matmul-transpose-b-benchmark.html | 42 -------- .../dl-op-tosa-transpose-benchmark.html | 36 ------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- ...dom3x3KernelAlignInt_CONSTANT_PADDING.html | 95 ------------------- ...om3x3KernelAlignInt_REPLICATE_PADDING.html | 95 ------------------- .../index.html | 14 --- .../vectorization/vectorization_matrix.html | 40 -------- site/benchmarks/latest/index.html | 1 - .../build_results_crosscompile_summary.log | 29 ------ .../deeplearning/dl-layer-ffn-benchmark.json | 16 ++-- .../deeplearning/dl-layer-ffn-benchmark.log | 8 +- .../dl-layer-rmsnorm-benchmark.json | 16 ++-- .../dl-layer-rmsnorm-benchmark.log | 8 +- .../dl-layer-selfattention-benchmark.json | 16 ++-- .../dl-layer-selfattention-benchmark.log | 8 +- .../dl-model-lenet-benchmark.json | 16 ++-- .../deeplearning/dl-model-lenet-benchmark.log | 10 +- .../dl-model-mobilenetv3-benchmark.json | 16 ++-- .../dl-model-mobilenetv3-benchmark.log | 8 +- .../dl-model-resnet18-benchmark.json | 12 +-- .../dl-model-resnet18-benchmark.log | 8 +- .../dl-model-tinyllama-benchmark.json | 16 ++-- .../dl-model-tinyllama-benchmark.log | 10 +- .../dl-model-whisper-benchmark.json | 12 +-- .../dl-model-whisper-benchmark.log | 12 +-- .../dl-op-linalg-arithaddf-benchmark.json | 16 ++-- .../dl-op-linalg-arithaddf-benchmark.log | 8 +- .../dl-op-linalg-arithdivf-benchmark.json | 16 ++-- .../dl-op-linalg-arithdivf-benchmark.log | 8 +- .../dl-op-linalg-arithmulf-benchmark.json | 16 ++-- .../dl-op-linalg-arithmulf-benchmark.log | 8 +- .../dl-op-linalg-arithnegf-benchmark.json | 16 ++-- .../dl-op-linalg-arithnegf-benchmark.log | 8 +- .../dl-op-linalg-arithsubf-benchmark.json | 16 ++-- .../dl-op-linalg-arithsubf-benchmark.log | 8 +- .../dl-op-linalg-batch-matmul-benchmark.json | 32 +++---- .../dl-op-linalg-batch-matmul-benchmark.log | 18 ++-- ...-op-linalg-conv2d-nchw-fchw-benchmark.json | 14 +-- ...l-op-linalg-conv2d-nchw-fchw-benchmark.log | 8 +- ...-op-linalg-conv2d-nhwc-fhwc-benchmark.json | 20 ++-- ...l-op-linalg-conv2d-nhwc-fhwc-benchmark.log | 10 +- ...-op-linalg-conv2d-nhwc-hwcf-benchmark.json | 16 ++-- ...l-op-linalg-conv2d-nhwc-hwcf-benchmark.log | 8 +- ...-depthwise-conv-2d-nhwc-hwc-benchmark.json | 16 ++-- ...g-depthwise-conv-2d-nhwc-hwc-benchmark.log | 10 +- .../dl-op-linalg-mathexp-benchmark.json | 16 ++-- .../dl-op-linalg-mathexp-benchmark.log | 8 +- .../dl-op-linalg-mathfpow-benchmark.json | 16 ++-- .../dl-op-linalg-mathfpow-benchmark.log | 8 +- .../dl-op-linalg-mathrsqrt-benchmark.json | 16 ++-- .../dl-op-linalg-mathrsqrt-benchmark.log | 8 +- .../dl-op-linalg-matmul-benchmark.json | 24 ++--- .../dl-op-linalg-matmul-benchmark.log | 14 +-- ...-op-linalg-pooling-nhwc-sum-benchmark.json | 16 ++-- ...l-op-linalg-pooling-nhwc-sum-benchmark.log | 8 +- .../dl-op-linalg-reduceaddf-benchmark.json | 4 +- .../dl-op-linalg-reduceaddf-benchmark.log | 4 +- .../dl-op-linalg-reducemaxf-benchmark.json | 4 +- .../dl-op-linalg-reducemaxf-benchmark.log | 4 +- ...-linalg-softmax-exp-sum-div-benchmark.json | 16 ++-- ...p-linalg-softmax-exp-sum-div-benchmark.log | 8 +- .../dl-op-matmul-transpose-b-benchmark.json | 20 ++-- .../dl-op-matmul-transpose-b-benchmark.log | 12 +-- .../dl-op-tosa-transpose-benchmark.json | 12 +-- .../dl-op-tosa-transpose-benchmark.log | 8 +- .../deeplearning/run_results_summary.log | 5 +- 122 files changed, 385 insertions(+), 4187 deletions(-) delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/benchmarks/2025-07-27/index.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-ffn-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-rmsnorm-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-selfattention-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-lenet-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-mobilenetv3-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-resnet18-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-tinyllama-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-whisper-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithaddf-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithdivf-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithmulf-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithnegf-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithsubf-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-batch-matmul-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathexp-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathfpow-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-matmul-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reduceaddf-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reducemaxf-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-matmul-transpose-b-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-tosa-transpose-benchmark.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/index.html delete mode 100644 site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/vectorization/vectorization_matrix.html delete mode 100644 site/benchmarks/latest/index.html diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 07f6ef11..5cf738ba 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -60,6 +60,22 @@ jobs: mkdir -p "${{ env.BENCH_DIR }}" python3 scripts/logs2html.py test_result "${{ env.BENCH_DIR }}/" + - name: Update benchmarks/latest redirect + working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site/benchmarks + run: | + set -e + # -------- pick the most recent dated folder (YYYY-MM-DD) ------------- + latest=$(ls -1d 20*/ | sort -r | head -n1 | tr -d '/') + echo "[Info] newest run is: $latest" + + # -------- rebuild the 'latest' folder with a meta-refresh ------------ + rm -rf latest + mkdir -p latest + cat > latest/index.html < + EOF + echo "[Info] benchmarks/latest now points to ../${latest}/" + # ------------------------------------------------------------ # 4) make /benchmarks/ point to the most recent run as well # ------------------------------------------------------------ @@ -92,7 +108,7 @@ jobs:
              {% for f in site.static_files %} {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %} -
            • {{ f.name }}
            • +
            • {{ f.name }}
            • {% endif %} {% endfor %}
            @@ -107,4 +123,4 @@ jobs: publish_dir: /home/quliu/buddy-complier-workspace/buddy-benchmark/site # <- root of generated site publish_branch: master keep_files: true # keep earlier runs - enable_jekyll: true \ No newline at end of file + enable_jekyll: true diff --git a/.gitignore b/.gitignore index bb52eca8..ef5a5a62 100755 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,14 @@ # vscode configurations /.vscode + +# Third-party checkouts and external repos +/thirdparty/ + +# Generated sites and results +/site/ +/test_result/ + +# Local Python/venv stuff +__pycache__/ +.venv/ diff --git a/.gitmodules b/.gitmodules index 2c8ef1d1..7585fff9 100755 --- a/.gitmodules +++ b/.gitmodules @@ -1,12 +1,16 @@ [submodule "thirdparty/opencv"] path = thirdparty/opencv url = https://github.com/opencv/opencv.git + ignore = all [submodule "thirdparty/Halide"] path = thirdparty/Halide url = https://github.com/halide/Halide.git + ignore = all [submodule "thirdparty/eigen"] path = thirdparty/eigen url = https://gitlab.com/libeigen/eigen.git + ignore = all [submodule "thirdparty/kfr"] path = thirdparty/kfr url = https://github.com/kfrlib/kfr.git + ignore = all diff --git a/scripts/logs2html.py b/scripts/logs2html.py index b9fc79be..a567930b 100755 --- a/scripts/logs2html.py +++ b/scripts/logs2html.py @@ -86,24 +86,15 @@ def gbench_json_to_table(js_path: pathlib.Path) -> str: page.write_text("\n".join(body)) # --------------------------------------------------------------------------- -# ❶ choose a dated sub-folder for this run -# --------------------------------------------------------------------------- -run_dir = dst / "benchmarks" / datetime.date.today().isoformat() -run_dir.mkdir(parents=True, exist_ok=True) - -# --------------------------------------------------------------------------- -# ❷ collect links to every HTML page we just generated +# Build a simple index in the destination root (dst) # --------------------------------------------------------------------------- links = "\n".join( - f'
          • ' - f'{p.relative_to(run_dir).as_posix()}
          • ' - for p in sorted(run_dir.rglob("*.html")) # inside today's folder - if p.name != "index.html" # skip the index itself + f'
          • ' + f'{p.relative_to(dst).as_posix()}
          • ' + for p in sorted(dst.rglob("*.html")) + if p.name != "index.html" ) -# --------------------------------------------------------------------------- -# ❸ write (or overwrite) today’s index page -# --------------------------------------------------------------------------- -(run_dir / "index.html").write_text( +(dst / "index.html").write_text( CSS + f"

            Buddy-Benchmark results

              \n{links}\n
            " -) \ No newline at end of file +) diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/benchmarks/2025-07-27/index.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/benchmarks/2025-07-27/index.html deleted file mode 100644 index 1641d47e..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/benchmarks/2025-07-27/index.html +++ /dev/null @@ -1,14 +0,0 @@ - - -

            Buddy-Benchmark results

              - -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-ffn-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-ffn-benchmark.html deleted file mode 100644 index 2a3eee65..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-ffn-benchmark.html +++ /dev/null @@ -1,37 +0,0 @@ - - - -

            deeplearning/dl-layer-ffn-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-layer-ffn-benchmark.json

            -
            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            - -
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_FFN/Scalar0.06540.065410,762
            DL_LAYER_FFN/Auto_Vectorization0.02710.027125,673
            -
            Console output -
            2025-07-27T14:26:49+00:00
            -Running ./dl-layer-ffn-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.04, 1.19, 1.31
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------
            -Benchmark                                Time             CPU   Iterations
            ---------------------------------------------------------------------------
            -DL_LAYER_FFN/Scalar                  0.065 ms        0.065 ms        10762
            -DL_LAYER_FFN/Auto_Vectorization      0.027 ms        0.027 ms        25673
            ------------------------------------------------------------
            -Correctness Verification: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-rmsnorm-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-rmsnorm-benchmark.html deleted file mode 100644 index 00796040..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-rmsnorm-benchmark.html +++ /dev/null @@ -1,37 +0,0 @@ - - - -

            deeplearning/dl-layer-rmsnorm-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-layer-rmsnorm-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_RMSNORM/Scalar0.001960.00196356,202
            DL_LAYER_RMSNORM/Auto_Vectorization0.0009150.000915751,546
            -
            Console output -
            2025-07-27T14:26:53+00:00
            -Running ./dl-layer-rmsnorm-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.03, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            -------------------------------------------------------------------------------
            -Benchmark                                    Time             CPU   Iterations
            -------------------------------------------------------------------------------
            -DL_LAYER_RMSNORM/Scalar                  0.002 ms        0.002 ms       356202
            -DL_LAYER_RMSNORM/Auto_Vectorization      0.001 ms        0.001 ms       751546
            ------------------------------------------------------------
            -Correctness Verification: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-selfattention-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-selfattention-benchmark.html deleted file mode 100644 index bd2db1af..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-layer-selfattention-benchmark.html +++ /dev/null @@ -1,37 +0,0 @@ - - - -

            deeplearning/dl-layer-selfattention-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-layer-selfattention-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_LAYER_ATTENTION/Scalar4.694.69149
            DL_LAYER_ATTENTION/Auto_Vectorization1.571.57446
            -
            Console output -
            2025-07-27T14:26:51+00:00
            -Running ./dl-layer-selfattention-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.04, 1.19, 1.31
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------
            -Benchmark                                      Time             CPU   Iterations
            ---------------------------------------------------------------------------------
            -DL_LAYER_ATTENTION/Scalar                   4.69 ms         4.69 ms          149
            -DL_LAYER_ATTENTION/Auto_Vectorization       1.57 ms         1.57 ms          446
            ------------------------------------------------------------
            -Correctness Verification: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-lenet-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-lenet-benchmark.html deleted file mode 100644 index 2fa5e6a3..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-lenet-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-model-lenet-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-model-lenet-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_LENET/Auto_Vectorization0.1650.1654,304
            DL_MODEL_LENET/Buddy_Vectorization0.1370.1375,022
            -
            Console output -
            2025-07-27T14:22:52+00:00
            -Running ./dl-model-lenet-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.40, 1.39, 1.40
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ------------------------------------------------------------------------------
            -Benchmark                                   Time             CPU   Iterations
            ------------------------------------------------------------------------------
            -DL_MODEL_LENET/Auto_Vectorization       0.165 ms        0.165 ms         4304
            -DL_MODEL_LENET/Buddy_Vectorization      0.137 ms        0.137 ms         5022
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: FAIL
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-mobilenetv3-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-mobilenetv3-benchmark.html deleted file mode 100644 index 3ad148a6..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-mobilenetv3-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-model-mobilenetv3-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-model-mobilenetv3-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_MobileNet_V3/BM_MobileNet_V3_scalar37.137.119
            BM_MobileNet_V3/BM_MobileNet_V3_conv_opt333321
            -
            Console output -
            2025-07-27T14:22:49+00:00
            -Running ./dl-model-mobilenetv3-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.40, 1.39, 1.40
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ------------------------------------------------------------------------------------
            -Benchmark                                         Time             CPU   Iterations
            ------------------------------------------------------------------------------------
            -BM_MobileNet_V3/BM_MobileNet_V3_scalar         37.1 ms         37.1 ms           19
            -BM_MobileNet_V3/BM_MobileNet_V3_conv_opt       33.0 ms         33.0 ms           21
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-resnet18-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-resnet18-benchmark.html deleted file mode 100644 index 161db951..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-resnet18-benchmark.html +++ /dev/null @@ -1,37 +0,0 @@ - - - -

            deeplearning/dl-model-resnet18-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-model-resnet18-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Resnet18/Auto_Vectorization7317231
            DL_MODEL_Resnet18/Buddy_Vectorization7297221
            -
            Console output -
            2025-07-27T14:26:46+00:00
            -Running ./dl-model-resnet18-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.04, 1.19, 1.31
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------
            -Benchmark                                      Time             CPU   Iterations
            ---------------------------------------------------------------------------------
            -DL_MODEL_Resnet18/Auto_Vectorization         731 ms          723 ms            1
            -DL_MODEL_Resnet18/Buddy_Vectorization        729 ms          722 ms            1
            ------------------------------------------------------------
            -Correctness Verification: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-tinyllama-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-tinyllama-benchmark.html deleted file mode 100644 index 2627afe7..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-tinyllama-benchmark.html +++ /dev/null @@ -1,39 +0,0 @@ - - - -

            deeplearning/dl-model-tinyllama-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-model-tinyllama-benchmark.json

            - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_TINYLLAMA/scalar1.39e+051.39e+051
            DL_MODEL_TINYLLAMA/matmul_opt1e+041e+041
            DL_MODEL_TINYLLAMA/matmul_opt_omp7.84e+037.2e+031
            -
            Console output -
            2025-07-27T14:17:33+00:00
            -Running ./dl-model-tinyllama-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.70, 1.92, 1.54
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            -----------------------------------------------------------------------------
            -Benchmark                                  Time             CPU   Iterations
            -----------------------------------------------------------------------------
            -DL_MODEL_TINYLLAMA/scalar             139185 ms       139179 ms            1
            -DL_MODEL_TINYLLAMA/matmul_opt          10038 ms        10038 ms            1
            -DL_MODEL_TINYLLAMA/matmul_opt_omp       7836 ms         7201 ms            1
            ----------- Verification ----------
            -matmul_opt PASS
            -matmul_opt_omp PASS
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-whisper-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-whisper-benchmark.html deleted file mode 100644 index 371cd9bf..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-model-whisper-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-model-whisper-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-model-whisper-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_MODEL_Whisper/Auto_Vectorization8e+048e+041
            DL_MODEL_Whisper/Buddy_Vectorization3.67e+043.67e+041
            -
            Console output -
            2025-07-27T14:22:54+00:00
            -Running ./dl-model-whisper-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.45, 1.40, 1.40
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            --------------------------------------------------------------------------------
            -Benchmark                                     Time             CPU   Iterations
            --------------------------------------------------------------------------------
            -DL_MODEL_Whisper/Auto_Vectorization       79983 ms        79980 ms            1
            -DL_MODEL_Whisper/Buddy_Vectorization      36713 ms        36700 ms            1
            ------------------------------------------------------------
            -Correctness Verification for Output1: PASS
            -Correctness Verification for Output2: FAIL
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithaddf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithaddf-benchmark.html deleted file mode 100644 index e6a48c8f..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithaddf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-arithaddf-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-arithaddf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_ADDF_SCALAR0.02950.029523,451
            BM_ADDF_AutoVectorization0.0040.004174,931
            -
            Console output -
            2025-07-27T14:27:23+00:00
            -Running ./dl-op-linalg-arithaddf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.07, 1.18, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_ADDF_SCALAR                 0.030 ms        0.030 ms        23451
            -BM_ADDF_AutoVectorization      0.004 ms        0.004 ms       174931
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithdivf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithdivf-benchmark.html deleted file mode 100644 index 8f642d99..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithdivf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-arithdivf-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-arithdivf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_DIVF_SCALAR0.02980.029823,358
            BM_DIVF_AutoVectorization0.009490.0094967,517
            -
            Console output -
            2025-07-27T14:27:25+00:00
            -Running ./dl-op-linalg-arithdivf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.07, 1.18, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_DIVF_SCALAR                 0.030 ms        0.030 ms        23358
            -BM_DIVF_AutoVectorization      0.009 ms        0.009 ms        67517
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithmulf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithmulf-benchmark.html deleted file mode 100644 index b490a073..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithmulf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-arithmulf-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-arithmulf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_MULF_SCALAR0.02980.029823,441
            BM_MULF_AutoVectorization0.0040.004175,263
            -
            Console output -
            2025-07-27T14:27:27+00:00
            -Running ./dl-op-linalg-arithmulf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.07, 1.18, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_MULF_SCALAR                 0.030 ms        0.030 ms        23441
            -BM_MULF_AutoVectorization      0.004 ms        0.004 ms       175263
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithnegf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithnegf-benchmark.html deleted file mode 100644 index 3ac73c79..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithnegf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-arithnegf-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-arithnegf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_NEGF_SCALAR0.02250.022530,969
            BM_NEGF_AutoVectorization0.002460.00246277,205
            -
            Console output -
            2025-07-27T14:27:29+00:00
            -Running ./dl-op-linalg-arithnegf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.07, 1.18, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_NEGF_SCALAR                 0.023 ms        0.023 ms        30969
            -BM_NEGF_AutoVectorization      0.002 ms        0.002 ms       277205
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithsubf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithsubf-benchmark.html deleted file mode 100644 index 955190f4..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-arithsubf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-arithsubf-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-arithsubf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_SUBF_SCALAR0.02940.029423,509
            BM_SUBF_AutoVectorization0.003990.00399175,223
            -
            Console output -
            2025-07-27T14:27:31+00:00
            -Running ./dl-op-linalg-arithsubf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.07, 1.18, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_SUBF_SCALAR                 0.029 ms        0.029 ms        23509
            -BM_SUBF_AutoVectorization      0.004 ms        0.004 ms       175223
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-batch-matmul-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-batch-matmul-benchmark.html deleted file mode 100644 index 7426c7e3..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-batch-matmul-benchmark.html +++ /dev/null @@ -1,49 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-batch-matmul-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-batch-matmul-benchmark.json

            - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_BATCH_MATMUL/Scalar/iterations:13.54e+033.54e+031
            DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:19769761
            DL_OPS_BATCH_MATMUL/Vectorization/iterations:11951951
            DL_OPS_BATCH_MATMUL/Tile/iterations:11091091
            DL_OPS_BATCH_MATMUL/SCF/iterations:11181181
            DL_OPS_BATCH_MATMUL/BROADCAST/iterations:13563561
            DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:16232.11
            -
            Console output -
            2025-07-27T14:27:13+00:00
            -Running ./dl-op-linalg-batch-matmul-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.08, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ----------------------------------------------------------------------------------------------
            -Benchmark                                                   Time             CPU   Iterations
            ----------------------------------------------------------------------------------------------
            -DL_OPS_BATCH_MATMUL/Scalar/iterations:1                  3536 ms         3536 ms            1
            -DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1        976 ms          976 ms            1
            -DL_OPS_BATCH_MATMUL/Vectorization/iterations:1            195 ms          195 ms            1
            -DL_OPS_BATCH_MATMUL/Tile/iterations:1                     109 ms          109 ms            1
            -DL_OPS_BATCH_MATMUL/SCF/iterations:1                      118 ms          118 ms            1
            -DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1                356 ms          356 ms            1
            -DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1           62.0 ms         32.1 ms            1
            ----------- Verification ----------
            -Tile PASS
            -SCF PASS
            -BROADCAST PASS
            -BROADCAST_OMP PASS
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html deleted file mode 100644 index f9329509..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-conv2d-nchw-fchw-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_Conv2DNchwFchw_SCALAR2832832
            BM_Conv2DNchwFchw_Im2col6.86.8101
            -
            Console output -
            2025-07-27T14:27:06+00:00
            -Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.10, 1.20, 1.31
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            --------------------------------------------------------------------
            -Benchmark                         Time             CPU   Iterations
            --------------------------------------------------------------------
            -BM_Conv2DNchwFchw_SCALAR        283 ms          283 ms            2
            -BM_Conv2DNchwFchw_Im2col       6.80 ms         6.80 ms          101
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html deleted file mode 100644 index 6199f4e4..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.html +++ /dev/null @@ -1,42 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json

            - - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:572.372.35
            DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:59.359.355
            DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:51.821.825
            DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:51.821.825
            -
            Console output -
            2025-07-27T14:27:10+00:00
            -Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.09, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ----------------------------------------------------------------------------------------------------
            -Benchmark                                                         Time             CPU   Iterations
            ----------------------------------------------------------------------------------------------------
            -DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5                   72.3 ms         72.3 ms            5
            -DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5       9.35 ms         9.35 ms            5
            -DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5            1.82 ms         1.82 ms            5
            -DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5                 1.82 ms         1.82 ms            5
            ----------- Verification ----------
            -auto_vectorization PASS
            -vectorization PASS
            -vec_tile PASS
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html deleted file mode 100644 index 497cb50b..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_CONV_2D_NHWC_HWCF_SCALAR32.332.322
            BM_CONV_2D_NHWC_HWCF_AutoVectorization6.136.13113
            -
            Console output -
            2025-07-27T14:27:08+00:00
            -Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.09, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ----------------------------------------------------------------------------------
            -Benchmark                                       Time             CPU   Iterations
            ----------------------------------------------------------------------------------
            -BM_CONV_2D_NHWC_HWCF_SCALAR                  32.3 ms         32.3 ms           22
            -BM_CONV_2D_NHWC_HWCF_AutoVectorization       6.13 ms         6.13 ms          113
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html deleted file mode 100644 index b45bf170..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.html +++ /dev/null @@ -1,39 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json

            - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:54.254.255
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:51.711.715
            DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:50.1250.1255
            -
            Console output -
            2025-07-27T14:27:11+00:00
            -Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.09, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            -------------------------------------------------------------------------------------------------------------
            -Benchmark                                                                  Time             CPU   Iterations
            -------------------------------------------------------------------------------------------------------------
            -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5                   4.25 ms         4.25 ms            5
            -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5       1.71 ms         1.71 ms            5
            -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5           0.125 ms        0.125 ms            5
            ----------- Verification ----------
            -auto_vectorization PASS
            -vectorization PASS
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathexp-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathexp-benchmark.html deleted file mode 100644 index b631432d..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathexp-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-mathexp-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-mathexp-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_EXP_SCALAR0.04560.045615,225
            BM_EXP_AutoVectorization0.03160.031622,248
            -
            Console output -
            2025-07-27T14:27:37+00:00
            -Running ./dl-op-linalg-mathexp-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.18, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            --------------------------------------------------------------------
            -Benchmark                         Time             CPU   Iterations
            --------------------------------------------------------------------
            -BM_EXP_SCALAR                 0.046 ms        0.046 ms        15225
            -BM_EXP_AutoVectorization      0.032 ms        0.032 ms        22248
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathfpow-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathfpow-benchmark.html deleted file mode 100644 index cdd4dd3c..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathfpow-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-mathfpow-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-mathfpow-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_FPOW_SCALAR0.08410.08418,255
            BM_FPOW_AutoVectorization0.05690.056912,305
            -
            Console output -
            2025-07-27T14:27:33+00:00
            -Running ./dl-op-linalg-mathfpow-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.18, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------
            -Benchmark                          Time             CPU   Iterations
            ---------------------------------------------------------------------
            -BM_FPOW_SCALAR                 0.084 ms        0.084 ms         8255
            -BM_FPOW_AutoVectorization      0.057 ms        0.057 ms        12305
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html deleted file mode 100644 index 89a528a2..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-mathrsqrt-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-mathrsqrt-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-mathrsqrt-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_RSQRT_SCALAR0.07280.07289,537
            BM_RSQRT_AutoVectorization0.004350.00435160,927
            -
            Console output -
            2025-07-27T14:27:35+00:00
            -Running ./dl-op-linalg-mathrsqrt-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.18, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ----------------------------------------------------------------------
            -Benchmark                           Time             CPU   Iterations
            ----------------------------------------------------------------------
            -BM_RSQRT_SCALAR                 0.073 ms        0.073 ms         9537
            -BM_RSQRT_AutoVectorization      0.004 ms        0.004 ms       160927
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-matmul-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-matmul-benchmark.html deleted file mode 100644 index 7b8b15e5..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-matmul-benchmark.html +++ /dev/null @@ -1,44 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-matmul-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-matmul-benchmark.json

            - - - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL/scalar_O0/iterations:13.93e+033.93e+031
            DL_OPS_MATMUL/scalar_O3/iterations:13.21e+033.21e+031
            DL_OPS_MATMUL/tile/iterations:11171171
            DL_OPS_MATMUL/vec/iterations:159.959.91
            DL_OPS_MATMUL/vec_omp/iterations:1229.111
            -
            Console output -
            2025-07-27T14:26:55+00:00
            -Running ./dl-op-linalg-matmul-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.03, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            --------------------------------------------------------------------------------
            -Benchmark                                     Time             CPU   Iterations
            --------------------------------------------------------------------------------
            -DL_OPS_MATMUL/scalar_O0/iterations:1       3933 ms         3933 ms            1
            -DL_OPS_MATMUL/scalar_O3/iterations:1       3212 ms         3212 ms            1
            -DL_OPS_MATMUL/tile/iterations:1             117 ms          117 ms            1
            -DL_OPS_MATMUL/vec/iterations:1             59.9 ms         59.9 ms            1
            -DL_OPS_MATMUL/vec_omp/iterations:1         22.0 ms         9.11 ms            1
            ----------- Verification ----------
            -tile PASS
            -vec PASS
            -vec_omp PASS
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html deleted file mode 100644 index 4a1b820d..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-pooling-nhwc-sum-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_POOLING_NHWC_SUM_SCALAR0.2330.2333,002
            BM_POOLING_NHWC_SUM_AutoVectorization0.04140.041416,950
            -
            Console output -
            2025-07-27T14:27:11+00:00
            -Running ./dl-op-linalg-pooling-nhwc-sum-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.09, 1.19, 1.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------
            -Benchmark                                      Time             CPU   Iterations
            ---------------------------------------------------------------------------------
            -BM_POOLING_NHWC_SUM_SCALAR                 0.233 ms        0.233 ms         3002
            -BM_POOLING_NHWC_SUM_AutoVectorization      0.041 ms        0.041 ms        16950
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reduceaddf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reduceaddf-benchmark.html deleted file mode 100644 index f07dc704..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reduceaddf-benchmark.html +++ /dev/null @@ -1,26 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-reduceaddf-benchmark.json

            2025-07-27 17:54:34 UTC

            -
            ⚠ FAILED: JSON parse error: Expecting value
            -
            Console output -
            2025-07-27T14:27:39+00:00
            -Running ./dl-op-linalg-reduceaddf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.17, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reducemaxf-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reducemaxf-benchmark.html deleted file mode 100644 index 1a007354..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-reducemaxf-benchmark.html +++ /dev/null @@ -1,26 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-reducemaxf-benchmark.json

            2025-07-27 17:54:34 UTC

            -
            ⚠ FAILED: JSON parse error: Expecting value
            -
            Console output -
            2025-07-27T14:27:39+00:00
            -Running ./dl-op-linalg-reducemaxf-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.17, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html deleted file mode 100644 index 71c06740..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.html +++ /dev/null @@ -1,38 +0,0 @@ - - - -

            deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-linalg-softmax-exp-sum-div-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            BM_SOFTMAXEXPSUMDIV_SCALAR0.005660.00566121,646
            BM_SOFTMAXEXPSUMDIV_AutoVectorization0.003850.00385181,826
            -
            Console output -
            2025-07-27T14:27:39+00:00
            -Running ./dl-op-linalg-softmax-exp-sum-div-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.17, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------
            -Benchmark                                      Time             CPU   Iterations
            ---------------------------------------------------------------------------------
            -BM_SOFTMAXEXPSUMDIV_SCALAR                 0.006 ms        0.006 ms       121646
            -BM_SOFTMAXEXPSUMDIV_AutoVectorization      0.004 ms        0.004 ms       181826
            ------------------------------------------------------------
            -Correctness Verification:
            -Transform case: PASS
            ------------------------------------------------------------
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-matmul-transpose-b-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-matmul-transpose-b-benchmark.html deleted file mode 100644 index c01788e3..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-matmul-transpose-b-benchmark.html +++ /dev/null @@ -1,42 +0,0 @@ - - - -

            deeplearning/dl-op-matmul-transpose-b-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-matmul-transpose-b-benchmark.json

            - - - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:51.05e+031.05e+035
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:52782785
            DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:532.322.45
            DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:585.585.55
            -
            Console output -
            2025-07-27T14:27:42+00:00
            -Running ./dl-op-matmul-transpose-b-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.17, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ------------------------------------------------------------------------------------------------
            -Benchmark                                                     Time             CPU   Iterations
            ------------------------------------------------------------------------------------------------
            -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5           1051 ms         1050 ms            5
            -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5            278 ms          278 ms            5
            -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5       32.3 ms         22.4 ms            5
            -DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5                 85.5 ms         85.5 ms            5
            ----------- Verification ----------
            -scalar_O3 PASS
            -scalar_O3_omp PASS
            -vec PASS
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-tosa-transpose-benchmark.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-tosa-transpose-benchmark.html deleted file mode 100644 index 267f6470..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/deeplearning/dl-op-tosa-transpose-benchmark.html +++ /dev/null @@ -1,36 +0,0 @@ - - - -

            deeplearning/dl-op-tosa-transpose-benchmark.json

            2025-07-27 17:54:34 UTC

            -

            dl-op-tosa-transpose-benchmark.json

            - - -
            NameTime (ms)CPU (ms)Iterations
            DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:526.421.45
            DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:518.913.45
            -
            Console output -
            2025-07-27T14:27:41+00:00
            -Running ./dl-op-tosa-transpose-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.17, 1.29
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            --------------------------------------------------------------------------------------
            -Benchmark                                           Time             CPU   Iterations
            --------------------------------------------------------------------------------------
            -DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5       26.4 ms         21.4 ms            5
            -DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5       18.9 ms         13.4 ms            5
            ----------- Verification ----------
            -scalar_O3 PASS
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 01c24ea8..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            MLIR_Conv2D/17.197.1997
            Buddy_Conv2D/10.4180.4181,675
            Buddy_Corr2D_Constant_Padding/11.061.06666
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,817
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,689
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,253
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,956
            Buddy_Erosion2D_Constant_Padding/10.2150.2153,274
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,255
            Buddy_Opening2D_Constant_Padding/10.3140.3142,261
            Buddy_Closing2D_Constant_Padding/10.3180.3182,259
            Buddy_TopHat2D_Constant_Padding/10.810.81835
            Buddy_BottomHat2D_Constant_Padding/10.7870.787848
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,119
            OpenCV_Opening2D_Constant_Padding/10.2260.2263,092
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,082
            OpenCV_TopHat2D_Constant_Padding/10.260.262,689
            OpenCV_BottomHat2D_Constant_Padding/10.2590.2592,705
            OpenCV_MorphGrad2D_Constant_Padding/10.2520.2522,779
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,113
            -
            Console output -
            2025-06-01T10:09:28+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.13, 1.85
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          143
            -MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
            -Buddy_Conv2D/1                                         0.418 ms        0.418 ms         1675
            -Buddy_Corr2D_Constant_Padding/1                         1.06 ms         1.06 ms          666
            -OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4817
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2689
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105253
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49956
            -Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3274
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3255
            -Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2261
            -Buddy_Closing2D_Constant_Padding/1                     0.318 ms        0.318 ms         2259
            -Buddy_TopHat2D_Constant_Padding/1                      0.810 ms        0.810 ms          835
            -Buddy_BottomHat2D_Constant_Padding/1                   0.787 ms        0.787 ms          848
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5119
            -OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3092
            -OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3082
            -OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2689
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.259 ms        0.259 ms         2705
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.252 ms        0.252 ms         2779
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5113
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 1b11967a..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88143
            MLIR_Conv2D/17.187.1897
            Buddy_Conv2D/10.4180.4181,675
            Buddy_Corr2D_Constant_Padding/11.061.06662
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,155
            OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,833
            Buddy_Erosion2D_Constant_Padding/10.2150.2153,267
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,262
            Buddy_Opening2D_Constant_Padding/10.3090.3092,259
            Buddy_Closing2D_Constant_Padding/10.3110.3112,232
            Buddy_TopHat2D_Constant_Padding/10.8010.801854
            Buddy_BottomHat2D_Constant_Padding/10.7950.795833
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,118
            OpenCV_Opening2D_Constant_Padding/10.2240.2243,117
            OpenCV_Closing2D_Constant_Padding/10.2260.2263,087
            OpenCV_TopHat2D_Constant_Padding/10.2560.2562,731
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,693
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,799
            OpenCV_Dilate2D_Constant_Padding/10.1390.1395,051
            -
            Console output -
            2025-06-01T10:09:52+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.12, 1.83
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          143
            -MLIR_Conv2D/1                                           7.18 ms         7.18 ms           97
            -Buddy_Conv2D/1                                         0.418 ms        0.418 ms         1675
            -Buddy_Corr2D_Constant_Padding/1                         1.06 ms         1.06 ms          662
            -OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105155
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49833
            -Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3267
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3262
            -Buddy_Opening2D_Constant_Padding/1                     0.309 ms        0.309 ms         2259
            -Buddy_Closing2D_Constant_Padding/1                     0.311 ms        0.311 ms         2232
            -Buddy_TopHat2D_Constant_Padding/1                      0.801 ms        0.801 ms          854
            -Buddy_BottomHat2D_Constant_Padding/1                   0.795 ms        0.795 ms          833
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5118
            -OpenCV_Opening2D_Constant_Padding/1                    0.224 ms        0.224 ms         3117
            -OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3087
            -OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2731
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2693
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2799
            -OpenCV_Dilate2D_Constant_Padding/1                     0.139 ms        0.139 ms         5051
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 35599864..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            MLIR_Conv2D/1292925
            Buddy_Conv2D/11.111.11632
            Buddy_Corr2D_Constant_Padding/11.741.74400
            OpenCV_Filter2D_Constant_Padding/12.682.68262
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,855
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006630.00663105,416
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,870
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,258
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,246
            Buddy_Opening2D_Constant_Padding/10.320.322,260
            Buddy_Closing2D_Constant_Padding/10.310.312,223
            Buddy_TopHat2D_Constant_Padding/10.8060.806827
            Buddy_BottomHat2D_Constant_Padding/10.820.82852
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,096
            OpenCV_Opening2D_Constant_Padding/10.2230.2233,136
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,085
            OpenCV_TopHat2D_Constant_Padding/10.260.262,693
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,686
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,746
            OpenCV_Dilate2D_Constant_Padding/10.1340.1345,208
            -
            Console output -
            2025-06-01T10:10:17+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.11, 1.81
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           61
            -MLIR_Conv2D/1                                           29.0 ms         29.0 ms           25
            -Buddy_Conv2D/1                                          1.11 ms         1.11 ms          632
            -Buddy_Corr2D_Constant_Padding/1                         1.74 ms         1.74 ms          400
            -OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          262
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4855
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2692
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105416
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49870
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3258
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3246
            -Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2260
            -Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2223
            -Buddy_TopHat2D_Constant_Padding/1                      0.806 ms        0.806 ms          827
            -Buddy_BottomHat2D_Constant_Padding/1                   0.820 ms        0.820 ms          852
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5096
            -OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3136
            -OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3085
            -OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2693
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2686
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2746
            -OpenCV_Dilate2D_Constant_Padding/1                     0.134 ms        0.134 ms         5208
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 714bf9ca..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            MLIR_Conv2D/1292925
            Buddy_Conv2D/11.021.02685
            Buddy_Corr2D_Constant_Padding/11.751.75400
            OpenCV_Filter2D_Constant_Padding/12.682.68261
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,858
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,372
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,847
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,249
            Buddy_Dilation2D_Constant_Padding/10.2130.2133,265
            Buddy_Opening2D_Constant_Padding/10.3140.3142,214
            Buddy_Closing2D_Constant_Padding/10.3080.3082,229
            Buddy_TopHat2D_Constant_Padding/10.790.79828
            Buddy_BottomHat2D_Constant_Padding/10.7770.777854
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,075
            OpenCV_Opening2D_Constant_Padding/10.2250.2253,111
            OpenCV_Closing2D_Constant_Padding/10.2290.2293,056
            OpenCV_TopHat2D_Constant_Padding/10.2620.2622,672
            OpenCV_BottomHat2D_Constant_Padding/10.2640.2642,653
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,750
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,201
            -
            Console output -
            2025-06-01T10:10:41+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.10, 1.79
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
            -MLIR_Conv2D/1                                           29.0 ms         29.0 ms           25
            -Buddy_Conv2D/1                                          1.02 ms         1.02 ms          685
            -Buddy_Corr2D_Constant_Padding/1                         1.75 ms         1.75 ms          400
            -OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4858
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105372
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49847
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3249
            -Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3265
            -Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2214
            -Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2229
            -Buddy_TopHat2D_Constant_Padding/1                      0.790 ms        0.790 ms          828
            -Buddy_BottomHat2D_Constant_Padding/1                   0.777 ms        0.777 ms          854
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5075
            -OpenCV_Opening2D_Constant_Padding/1                    0.225 ms        0.225 ms         3111
            -OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3056
            -OpenCV_TopHat2D_Constant_Padding/1                     0.262 ms        0.262 ms         2672
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.264 ms        0.264 ms         2653
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2750
            -OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5201
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index eab7f8ec..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            MLIR_Conv2D/17.387.3895
            Buddy_Conv2D/10.3120.3122,241
            Buddy_Corr2D_Constant_Padding/10.8210.821849
            OpenCV_Filter2D_Constant_Padding/11.281.28547
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1460.1464,774
            Buddy_Resize2D_Bilinear_Interpolation/10.2670.2672,627
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.00670.0067103,069
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,911
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,259
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,230
            Buddy_Opening2D_Constant_Padding/10.3090.3092,255
            Buddy_Closing2D_Constant_Padding/10.3090.3092,273
            Buddy_TopHat2D_Constant_Padding/10.7760.776855
            Buddy_BottomHat2D_Constant_Padding/10.7740.774856
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,148
            OpenCV_Opening2D_Constant_Padding/10.2190.2193,185
            OpenCV_Closing2D_Constant_Padding/10.2230.2233,143
            OpenCV_TopHat2D_Constant_Padding/10.2590.2592,699
            OpenCV_BottomHat2D_Constant_Padding/10.2580.2582,714
            OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,791
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,112
            -
            Console output -
            2025-06-01T10:05:28+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.19, 1.29, 2.11
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.87 ms         4.87 ms          144
            -MLIR_Conv2D/1                                           7.38 ms         7.38 ms           95
            -Buddy_Conv2D/1                                         0.312 ms        0.312 ms         2241
            -Buddy_Corr2D_Constant_Padding/1                        0.821 ms        0.821 ms          849
            -OpenCV_Filter2D_Constant_Padding/1                      1.28 ms         1.28 ms          547
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.146 ms        0.146 ms         4774
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.267 ms        0.267 ms         2627
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       103069
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49911
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3259
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3230
            -Buddy_Opening2D_Constant_Padding/1                     0.309 ms        0.309 ms         2255
            -Buddy_Closing2D_Constant_Padding/1                     0.309 ms        0.309 ms         2273
            -Buddy_TopHat2D_Constant_Padding/1                      0.776 ms        0.776 ms          855
            -Buddy_BottomHat2D_Constant_Padding/1                   0.774 ms        0.774 ms          856
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5148
            -OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3185
            -OpenCV_Closing2D_Constant_Padding/1                    0.223 ms        0.223 ms         3143
            -OpenCV_TopHat2D_Constant_Padding/1                     0.259 ms        0.259 ms         2699
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.258 ms        0.258 ms         2714
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2791
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5112
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 50abb917..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.94.9144
            MLIR_Conv2D/17.217.2197
            Buddy_Conv2D/10.3110.312,257
            Buddy_Corr2D_Constant_Padding/10.7980.798878
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,872
            Buddy_Resize2D_Bilinear_Interpolation/10.2620.2622,654
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,278
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,913
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,221
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,271
            Buddy_Opening2D_Constant_Padding/10.3160.3162,213
            Buddy_Closing2D_Constant_Padding/10.3110.3112,230
            Buddy_TopHat2D_Constant_Padding/10.80.8866
            Buddy_BottomHat2D_Constant_Padding/10.7970.797846
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,058
            OpenCV_Opening2D_Constant_Padding/10.2220.2223,149
            OpenCV_Closing2D_Constant_Padding/10.2210.2213,169
            OpenCV_TopHat2D_Constant_Padding/10.2570.2572,725
            OpenCV_BottomHat2D_Constant_Padding/10.2570.2572,715
            OpenCV_MorphGrad2D_Constant_Padding/10.250.252,798
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,116
            -
            Console output -
            2025-06-01T10:05:52+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.20, 1.29, 2.09
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.90 ms         4.90 ms          144
            -MLIR_Conv2D/1                                           7.21 ms         7.21 ms           97
            -Buddy_Conv2D/1                                         0.311 ms        0.310 ms         2257
            -Buddy_Corr2D_Constant_Padding/1                        0.798 ms        0.798 ms          878
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4872
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.262 ms        0.262 ms         2654
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105278
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49913
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3221
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3271
            -Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2213
            -Buddy_Closing2D_Constant_Padding/1                     0.311 ms        0.311 ms         2230
            -Buddy_TopHat2D_Constant_Padding/1                      0.800 ms        0.800 ms          866
            -Buddy_BottomHat2D_Constant_Padding/1                   0.797 ms        0.797 ms          846
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5058
            -OpenCV_Opening2D_Constant_Padding/1                    0.222 ms        0.222 ms         3149
            -OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3169
            -OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2725
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.257 ms        0.257 ms         2715
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2798
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5116
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 3389cc83..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.774.77147
            MLIR_Conv2D/17.27.297
            Buddy_Conv2D/10.310.312,252
            Buddy_Corr2D_Constant_Padding/10.8020.802868
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,649
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669105,099
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,931
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,253
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,245
            Buddy_Opening2D_Constant_Padding/10.3160.3162,229
            Buddy_Closing2D_Constant_Padding/10.3130.3132,256
            Buddy_TopHat2D_Constant_Padding/10.8040.804822
            Buddy_BottomHat2D_Constant_Padding/10.7990.799842
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,153
            OpenCV_Opening2D_Constant_Padding/10.220.223,158
            OpenCV_Closing2D_Constant_Padding/10.2210.2213,163
            OpenCV_TopHat2D_Constant_Padding/10.2570.2572,727
            OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,742
            OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,826
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,148
            -
            Console output -
            2025-06-01T10:06:16+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.13, 1.26, 2.06
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.77 ms         4.77 ms          147
            -MLIR_Conv2D/1                                           7.20 ms         7.20 ms           97
            -Buddy_Conv2D/1                                         0.310 ms        0.310 ms         2252
            -Buddy_Corr2D_Constant_Padding/1                        0.802 ms        0.802 ms          868
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2649
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105099
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49931
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3253
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3245
            -Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2229
            -Buddy_Closing2D_Constant_Padding/1                     0.313 ms        0.313 ms         2256
            -Buddy_TopHat2D_Constant_Padding/1                      0.804 ms        0.804 ms          822
            -Buddy_BottomHat2D_Constant_Padding/1                   0.799 ms        0.799 ms          842
            -OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5153
            -OpenCV_Opening2D_Constant_Padding/1                    0.220 ms        0.220 ms         3158
            -OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3163
            -OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2727
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2742
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2826
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5148
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index b2958986..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.914.91143
            MLIR_Conv2D/17.177.1798
            Buddy_Conv2D/10.310.312,260
            Buddy_Corr2D_Constant_Padding/10.7950.795875
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,871
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,651
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,620
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,783
            Buddy_Erosion2D_Constant_Padding/10.2180.2183,101
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,270
            Buddy_Opening2D_Constant_Padding/10.3190.3192,180
            Buddy_Closing2D_Constant_Padding/10.3120.3122,262
            Buddy_TopHat2D_Constant_Padding/10.8140.814841
            Buddy_BottomHat2D_Constant_Padding/10.820.82849
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,157
            OpenCV_Opening2D_Constant_Padding/10.2190.2193,187
            OpenCV_Closing2D_Constant_Padding/10.2180.2183,207
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,745
            OpenCV_BottomHat2D_Constant_Padding/10.2530.2532,766
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,808
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,102
            -
            Console output -
            2025-06-01T10:06:40+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.08, 1.24, 2.03
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.91 ms         4.91 ms          143
            -MLIR_Conv2D/1                                           7.17 ms         7.17 ms           98
            -Buddy_Conv2D/1                                         0.310 ms        0.310 ms         2260
            -Buddy_Corr2D_Constant_Padding/1                        0.795 ms        0.795 ms          875
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4871
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2651
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104620
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49783
            -Buddy_Erosion2D_Constant_Padding/1                     0.218 ms        0.218 ms         3101
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3270
            -Buddy_Opening2D_Constant_Padding/1                     0.319 ms        0.319 ms         2180
            -Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2262
            -Buddy_TopHat2D_Constant_Padding/1                      0.814 ms        0.814 ms          841
            -Buddy_BottomHat2D_Constant_Padding/1                   0.820 ms        0.820 ms          849
            -OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5157
            -OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3187
            -OpenCV_Closing2D_Constant_Padding/1                    0.218 ms        0.218 ms         3207
            -OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2745
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.253 ms        0.253 ms         2766
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2808
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5102
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 13bbced9..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.660
            MLIR_Conv2D/129.229.224
            Buddy_Conv2D/11.311.31536
            Buddy_Corr2D_Constant_Padding/12.332.33300
            OpenCV_Filter2D_Constant_Padding/14.114.11170
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,689
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,080
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,721
            Buddy_Erosion2D_Constant_Padding/10.2160.2163,235
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,260
            Buddy_Opening2D_Constant_Padding/10.310.312,249
            Buddy_Closing2D_Constant_Padding/10.3120.3122,139
            Buddy_TopHat2D_Constant_Padding/10.780.78826
            Buddy_BottomHat2D_Constant_Padding/10.7820.782830
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,049
            OpenCV_Opening2D_Constant_Padding/10.2260.2263,095
            OpenCV_Closing2D_Constant_Padding/10.2250.2253,109
            OpenCV_TopHat2D_Constant_Padding/10.260.262,690
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,688
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,759
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,116
            -
            Console output -
            2025-06-01T10:07:04+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.05, 1.22, 2.00
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           60
            -MLIR_Conv2D/1                                           29.2 ms         29.2 ms           24
            -Buddy_Conv2D/1                                          1.31 ms         1.31 ms          536
            -Buddy_Corr2D_Constant_Padding/1                         2.33 ms         2.33 ms          300
            -OpenCV_Filter2D_Constant_Padding/1                      4.11 ms         4.11 ms          170
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2689
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105080
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49721
            -Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3235
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3260
            -Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2249
            -Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2139
            -Buddy_TopHat2D_Constant_Padding/1                      0.780 ms        0.780 ms          826
            -Buddy_BottomHat2D_Constant_Padding/1                   0.782 ms        0.782 ms          830
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5049
            -OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3095
            -OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3109
            -OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2690
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2688
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2759
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5116
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 40dbd47e..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            MLIR_Conv2D/129.129.124
            Buddy_Conv2D/11.381.38508
            Buddy_Corr2D_Constant_Padding/12.322.32301
            OpenCV_Filter2D_Constant_Padding/14.14.1170
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,064
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,925
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,267
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,269
            Buddy_Opening2D_Constant_Padding/10.3160.3162,235
            Buddy_Closing2D_Constant_Padding/10.3150.3152,209
            Buddy_TopHat2D_Constant_Padding/10.8010.801841
            Buddy_BottomHat2D_Constant_Padding/10.7850.785852
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,129
            OpenCV_Opening2D_Constant_Padding/10.2250.2253,105
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,082
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,679
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,672
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,751
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,094
            -
            Console output -
            2025-06-01T10:07:28+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.03, 1.20, 1.98
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           60
            -MLIR_Conv2D/1                                           29.1 ms         29.1 ms           24
            -Buddy_Conv2D/1                                          1.38 ms         1.38 ms          508
            -Buddy_Corr2D_Constant_Padding/1                         2.32 ms         2.32 ms          301
            -OpenCV_Filter2D_Constant_Padding/1                      4.10 ms         4.10 ms          170
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105064
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49925
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3267
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3269
            -Buddy_Opening2D_Constant_Padding/1                     0.316 ms        0.316 ms         2235
            -Buddy_Closing2D_Constant_Padding/1                     0.315 ms        0.315 ms         2209
            -Buddy_TopHat2D_Constant_Padding/1                      0.801 ms        0.801 ms          841
            -Buddy_BottomHat2D_Constant_Padding/1                   0.785 ms        0.785 ms          852
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5129
            -OpenCV_Opening2D_Constant_Padding/1                    0.225 ms        0.225 ms         3105
            -OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3082
            -OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2679
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2672
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2751
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5094
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 10faaad4..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.531
            MLIR_Conv2D/166.766.711
            Buddy_Conv2D/12.242.24312
            Buddy_Corr2D_Constant_Padding/14.674.67150
            OpenCV_Filter2D_Constant_Padding/18.618.6181
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,200
            OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,717
            Buddy_Erosion2D_Constant_Padding/10.2130.2133,275
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,279
            Buddy_Opening2D_Constant_Padding/10.310.312,249
            Buddy_Closing2D_Constant_Padding/10.3140.3142,267
            Buddy_TopHat2D_Constant_Padding/10.7890.789827
            Buddy_BottomHat2D_Constant_Padding/10.7630.763845
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,188
            OpenCV_Opening2D_Constant_Padding/10.2290.2293,054
            OpenCV_Closing2D_Constant_Padding/10.2290.2293,052
            OpenCV_TopHat2D_Constant_Padding/10.2620.2622,667
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,674
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,759
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,114
            -
            Console output -
            2025-06-01T10:07:52+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.02, 1.19, 1.96
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      21.5 ms         21.5 ms           31
            -MLIR_Conv2D/1                                           66.7 ms         66.7 ms           11
            -Buddy_Conv2D/1                                          2.24 ms         2.24 ms          312
            -Buddy_Corr2D_Constant_Padding/1                         4.67 ms         4.67 ms          150
            -OpenCV_Filter2D_Constant_Padding/1                      8.61 ms         8.61 ms           81
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105200
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49717
            -Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3275
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3279
            -Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2249
            -Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.314 ms         2267
            -Buddy_TopHat2D_Constant_Padding/1                      0.789 ms        0.789 ms          827
            -Buddy_BottomHat2D_Constant_Padding/1                   0.763 ms        0.763 ms          845
            -OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5188
            -OpenCV_Opening2D_Constant_Padding/1                    0.229 ms        0.229 ms         3054
            -OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3052
            -OpenCV_TopHat2D_Constant_Padding/1                     0.262 ms        0.262 ms         2667
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2674
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2759
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5114
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index e4137c5f..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.632
            MLIR_Conv2D/166.766.710
            Buddy_Conv2D/12.342.34299
            Buddy_Corr2D_Constant_Padding/14.674.67150
            OpenCV_Filter2D_Constant_Padding/18.68.681
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,853
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,142
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01450,003
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,229
            Buddy_Dilation2D_Constant_Padding/10.2170.2173,262
            Buddy_Opening2D_Constant_Padding/10.3080.3082,262
            Buddy_Closing2D_Constant_Padding/10.310.312,236
            Buddy_TopHat2D_Constant_Padding/10.7770.777855
            Buddy_BottomHat2D_Constant_Padding/10.7960.796826
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,125
            OpenCV_Opening2D_Constant_Padding/10.2270.2273,079
            OpenCV_Closing2D_Constant_Padding/10.2260.2263,097
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,680
            OpenCV_BottomHat2D_Constant_Padding/10.260.262,694
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,766
            OpenCV_Dilate2D_Constant_Padding/10.140.144,993
            -
            Console output -
            2025-06-01T10:08:16+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.02, 1.17, 1.93
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      21.6 ms         21.6 ms           32
            -MLIR_Conv2D/1                                           66.7 ms         66.7 ms           10
            -Buddy_Conv2D/1                                          2.34 ms         2.34 ms          299
            -Buddy_Corr2D_Constant_Padding/1                         4.67 ms         4.67 ms          150
            -OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4853
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105142
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        50003
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3229
            -Buddy_Dilation2D_Constant_Padding/1                    0.217 ms        0.217 ms         3262
            -Buddy_Opening2D_Constant_Padding/1                     0.308 ms        0.308 ms         2262
            -Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2236
            -Buddy_TopHat2D_Constant_Padding/1                      0.777 ms        0.777 ms          855
            -Buddy_BottomHat2D_Constant_Padding/1                   0.796 ms        0.796 ms          826
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5125
            -OpenCV_Opening2D_Constant_Padding/1                    0.227 ms        0.227 ms         3079
            -OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3097
            -OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2680
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.260 ms        0.260 ms         2694
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2766
            -OpenCV_Dilate2D_Constant_Padding/1                     0.140 ms        0.140 ms         4993
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 2310ca2d..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.221
            MLIR_Conv2D/11191196
            Buddy_Conv2D/13.913.91179
            Buddy_Corr2D_Constant_Padding/17.797.7990
            OpenCV_Filter2D_Constant_Padding/15.895.89119
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,837
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,099
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,521
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,257
            Buddy_Dilation2D_Constant_Padding/10.2160.2153,222
            Buddy_Opening2D_Constant_Padding/10.3290.3292,228
            Buddy_Closing2D_Constant_Padding/10.3140.3132,221
            Buddy_TopHat2D_Constant_Padding/10.7890.789845
            Buddy_BottomHat2D_Constant_Padding/10.7930.793825
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,117
            OpenCV_Opening2D_Constant_Padding/10.220.223,176
            OpenCV_Closing2D_Constant_Padding/10.220.223,179
            OpenCV_TopHat2D_Constant_Padding/10.2540.2542,758
            OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,740
            OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,779
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,176
            -
            Console output -
            2025-06-01T10:08:40+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.01, 1.16, 1.90
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      34.2 ms         34.2 ms           21
            -MLIR_Conv2D/1                                            119 ms          119 ms            6
            -Buddy_Conv2D/1                                          3.91 ms         3.91 ms          179
            -Buddy_Corr2D_Constant_Padding/1                         7.79 ms         7.79 ms           90
            -OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4837
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105099
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49521
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3257
            -Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.215 ms         3222
            -Buddy_Opening2D_Constant_Padding/1                     0.329 ms        0.329 ms         2228
            -Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.313 ms         2221
            -Buddy_TopHat2D_Constant_Padding/1                      0.789 ms        0.789 ms          845
            -Buddy_BottomHat2D_Constant_Padding/1                   0.793 ms        0.793 ms          825
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5117
            -OpenCV_Opening2D_Constant_Padding/1                    0.220 ms        0.220 ms         3176
            -OpenCV_Closing2D_Constant_Padding/1                    0.220 ms        0.220 ms         3179
            -OpenCV_TopHat2D_Constant_Padding/1                     0.254 ms        0.254 ms         2758
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2740
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2779
            -OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5176
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 1fe16fe8..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.334.320
            MLIR_Conv2D/11191196
            Buddy_Conv2D/13.983.98176
            Buddy_Corr2D_Constant_Padding/17.87.890
            OpenCV_Filter2D_Constant_Padding/15.895.89119
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,830
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,690
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,110
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,449
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,196
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,263
            Buddy_Opening2D_Constant_Padding/10.3130.3132,208
            Buddy_Closing2D_Constant_Padding/10.3270.3272,187
            Buddy_TopHat2D_Constant_Padding/10.8030.803835
            Buddy_BottomHat2D_Constant_Padding/10.7980.798832
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,076
            OpenCV_Opening2D_Constant_Padding/10.2290.2293,051
            OpenCV_Closing2D_Constant_Padding/10.230.233,037
            OpenCV_TopHat2D_Constant_Padding/10.2630.2632,671
            OpenCV_BottomHat2D_Constant_Padding/10.2630.2632,661
            OpenCV_MorphGrad2D_Constant_Padding/10.2570.2572,719
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,132
            -
            Console output -
            2025-06-01T10:09:04+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.14, 1.88
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      34.3 ms         34.3 ms           20
            -MLIR_Conv2D/1                                            119 ms          119 ms            6
            -Buddy_Conv2D/1                                          3.98 ms         3.98 ms          176
            -Buddy_Corr2D_Constant_Padding/1                         7.80 ms         7.80 ms           90
            -OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4830
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2690
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105110
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49449
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3196
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3263
            -Buddy_Opening2D_Constant_Padding/1                     0.313 ms        0.313 ms         2208
            -Buddy_Closing2D_Constant_Padding/1                     0.327 ms        0.327 ms         2187
            -Buddy_TopHat2D_Constant_Padding/1                      0.803 ms        0.803 ms          835
            -Buddy_BottomHat2D_Constant_Padding/1                   0.798 ms        0.798 ms          832
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5076
            -OpenCV_Opening2D_Constant_Padding/1                    0.229 ms        0.229 ms         3051
            -OpenCV_Closing2D_Constant_Padding/1                    0.230 ms        0.230 ms         3037
            -OpenCV_TopHat2D_Constant_Padding/1                     0.263 ms        0.263 ms         2671
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.263 ms        0.263 ms         2661
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.257 ms        0.257 ms         2719
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5132
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index b53a109b..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.744.74148
            MLIR_Conv2D/17.27.297
            Buddy_Conv2D/10.7050.705994
            Buddy_Corr2D_Constant_Padding/11.071.07652
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,854
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,153
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,887
            Buddy_Erosion2D_Constant_Padding/10.2160.2163,272
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,249
            Buddy_Opening2D_Constant_Padding/10.3110.3112,259
            Buddy_Closing2D_Constant_Padding/10.3070.3072,229
            Buddy_TopHat2D_Constant_Padding/10.7770.777858
            Buddy_BottomHat2D_Constant_Padding/10.7670.767831
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,114
            OpenCV_Opening2D_Constant_Padding/10.2230.2233,131
            OpenCV_Closing2D_Constant_Padding/10.2220.2223,149
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,744
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,738
            OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,822
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,112
            -
            Console output -
            2025-06-01T10:03:44+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.01, 1.35, 2.23
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.74 ms         4.74 ms          148
            -MLIR_Conv2D/1                                           7.20 ms         7.20 ms           97
            -Buddy_Conv2D/1                                         0.705 ms        0.705 ms          994
            -Buddy_Corr2D_Constant_Padding/1                         1.07 ms         1.07 ms          652
            -OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4854
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2692
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105153
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49887
            -Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3272
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3249
            -Buddy_Opening2D_Constant_Padding/1                     0.311 ms        0.311 ms         2259
            -Buddy_Closing2D_Constant_Padding/1                     0.307 ms        0.307 ms         2229
            -Buddy_TopHat2D_Constant_Padding/1                      0.777 ms        0.777 ms          858
            -Buddy_BottomHat2D_Constant_Padding/1                   0.767 ms        0.767 ms          831
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5114
            -OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3131
            -OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3149
            -OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2744
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2738
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2822
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5112
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 00696aaa..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.884.88144
            MLIR_Conv2D/17.217.2197
            Buddy_Conv2D/10.7070.707988
            Buddy_Corr2D_Constant_Padding/11.051.05668
            OpenCV_Filter2D_Constant_Padding/11.861.86376
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,676
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666104,914
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,862
            Buddy_Erosion2D_Constant_Padding/10.2130.2133,188
            Buddy_Dilation2D_Constant_Padding/10.2160.2163,259
            Buddy_Opening2D_Constant_Padding/10.3170.3172,184
            Buddy_Closing2D_Constant_Padding/10.3140.3142,136
            Buddy_TopHat2D_Constant_Padding/10.7860.786814
            Buddy_BottomHat2D_Constant_Padding/10.7990.799847
            OpenCV_Erode2D_Constant_Padding/10.1390.1395,040
            OpenCV_Opening2D_Constant_Padding/10.2210.2213,163
            OpenCV_Closing2D_Constant_Padding/10.2190.2193,197
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,741
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,735
            OpenCV_MorphGrad2D_Constant_Padding/10.2480.2482,817
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,127
            -
            Console output -
            2025-06-01T10:04:08+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.01, 1.32, 2.20
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.88 ms         4.88 ms          144
            -MLIR_Conv2D/1                                           7.21 ms         7.21 ms           97
            -Buddy_Conv2D/1                                         0.707 ms        0.707 ms          988
            -Buddy_Corr2D_Constant_Padding/1                         1.05 ms         1.05 ms          668
            -OpenCV_Filter2D_Constant_Padding/1                      1.86 ms         1.86 ms          376
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2676
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104914
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49862
            -Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3188
            -Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.216 ms         3259
            -Buddy_Opening2D_Constant_Padding/1                     0.317 ms        0.317 ms         2184
            -Buddy_Closing2D_Constant_Padding/1                     0.314 ms        0.314 ms         2136
            -Buddy_TopHat2D_Constant_Padding/1                      0.786 ms        0.786 ms          814
            -Buddy_BottomHat2D_Constant_Padding/1                   0.799 ms        0.799 ms          847
            -OpenCV_Erode2D_Constant_Padding/1                      0.139 ms        0.139 ms         5040
            -OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3163
            -OpenCV_Closing2D_Constant_Padding/1                    0.219 ms        0.219 ms         3197
            -OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2741
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2735
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.248 ms        0.248 ms         2817
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5127
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 01d84f2f..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.560
            MLIR_Conv2D/129.129.124
            Buddy_Conv2D/12.042.04343
            Buddy_Corr2D_Constant_Padding/11.741.74400
            OpenCV_Filter2D_Constant_Padding/12.682.68261
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,858
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,687
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006670.00667104,992
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,870
            Buddy_Erosion2D_Constant_Padding/10.2160.2163,259
            Buddy_Dilation2D_Constant_Padding/10.2160.2163,239
            Buddy_Opening2D_Constant_Padding/10.320.322,223
            Buddy_Closing2D_Constant_Padding/10.3080.3082,211
            Buddy_TopHat2D_Constant_Padding/10.7910.791836
            Buddy_BottomHat2D_Constant_Padding/10.8060.805841
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,076
            OpenCV_Opening2D_Constant_Padding/10.2270.2273,086
            OpenCV_Closing2D_Constant_Padding/10.2250.2253,114
            OpenCV_TopHat2D_Constant_Padding/10.2640.2642,653
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,674
            OpenCV_MorphGrad2D_Constant_Padding/10.2550.2552,741
            OpenCV_Dilate2D_Constant_Padding/10.1380.1385,067
            -
            Console output -
            2025-06-01T10:04:31+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.30, 2.17
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           60
            -MLIR_Conv2D/1                                           29.1 ms         29.1 ms           24
            -Buddy_Conv2D/1                                          2.04 ms         2.04 ms          343
            -Buddy_Corr2D_Constant_Padding/1                         1.74 ms         1.74 ms          400
            -OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4858
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2687
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104992
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49870
            -Buddy_Erosion2D_Constant_Padding/1                     0.216 ms        0.216 ms         3259
            -Buddy_Dilation2D_Constant_Padding/1                    0.216 ms        0.216 ms         3239
            -Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2223
            -Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2211
            -Buddy_TopHat2D_Constant_Padding/1                      0.791 ms        0.791 ms          836
            -Buddy_BottomHat2D_Constant_Padding/1                   0.806 ms        0.805 ms          841
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5076
            -OpenCV_Opening2D_Constant_Padding/1                    0.227 ms        0.227 ms         3086
            -OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3114
            -OpenCV_TopHat2D_Constant_Padding/1                     0.264 ms        0.264 ms         2653
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2674
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.255 ms        0.255 ms         2741
            -OpenCV_Dilate2D_Constant_Padding/1                     0.138 ms        0.138 ms         5067
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index bb99d217..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            MLIR_Conv2D/1292924
            Buddy_Conv2D/12.082.08337
            Buddy_Corr2D_Constant_Padding/11.751.75399
            OpenCV_Filter2D_Constant_Padding/12.682.68261
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,856
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,688
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,682
            OpenCV_Resize2D_Bilinear_Interpolation/10.01410.014149,744
            Buddy_Erosion2D_Constant_Padding/10.2190.2193,240
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,213
            Buddy_Opening2D_Constant_Padding/10.3080.3082,240
            Buddy_Closing2D_Constant_Padding/10.3090.3092,269
            Buddy_TopHat2D_Constant_Padding/10.820.82841
            Buddy_BottomHat2D_Constant_Padding/10.80.8846
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,072
            OpenCV_Opening2D_Constant_Padding/10.2230.2233,139
            OpenCV_Closing2D_Constant_Padding/10.2280.2283,074
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,680
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,676
            OpenCV_MorphGrad2D_Constant_Padding/10.2540.2542,755
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,189
            -
            Console output -
            2025-06-01T10:04:55+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.29, 2.15
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
            -MLIR_Conv2D/1                                           29.0 ms         29.0 ms           24
            -Buddy_Conv2D/1                                          2.08 ms         2.08 ms          337
            -Buddy_Corr2D_Constant_Padding/1                         1.75 ms         1.75 ms          399
            -OpenCV_Filter2D_Constant_Padding/1                      2.68 ms         2.68 ms          261
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4856
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2688
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104682
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49744
            -Buddy_Erosion2D_Constant_Padding/1                     0.219 ms        0.219 ms         3240
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3213
            -Buddy_Opening2D_Constant_Padding/1                     0.308 ms        0.308 ms         2240
            -Buddy_Closing2D_Constant_Padding/1                     0.309 ms        0.309 ms         2269
            -Buddy_TopHat2D_Constant_Padding/1                      0.820 ms        0.820 ms          841
            -Buddy_BottomHat2D_Constant_Padding/1                   0.800 ms        0.800 ms          846
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5072
            -OpenCV_Opening2D_Constant_Padding/1                    0.223 ms        0.223 ms         3139
            -OpenCV_Closing2D_Constant_Padding/1                    0.228 ms        0.228 ms         3074
            -OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2680
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2676
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.254 ms        0.254 ms         2755
            -OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5189
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index b51ee6f8..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/15.035.03139
            MLIR_Conv2D/17.387.3895
            Buddy_Conv2D/10.5220.5211,363
            Buddy_Corr2D_Constant_Padding/10.8140.814865
            OpenCV_Filter2D_Constant_Padding/11.281.28548
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1460.1464,815
            Buddy_Resize2D_Bilinear_Interpolation/10.2670.2672,628
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006810.00681102,923
            OpenCV_Resize2D_Bilinear_Interpolation/10.01430.014348,919
            Buddy_Erosion2D_Constant_Padding/10.2220.2223,130
            Buddy_Dilation2D_Constant_Padding/10.220.223,158
            Buddy_Opening2D_Constant_Padding/10.3340.3342,177
            Buddy_Closing2D_Constant_Padding/10.3380.3382,114
            Buddy_TopHat2D_Constant_Padding/10.8930.893734
            Buddy_BottomHat2D_Constant_Padding/10.8990.899761
            OpenCV_Erode2D_Constant_Padding/10.1390.1395,020
            OpenCV_Opening2D_Constant_Padding/10.2170.2173,210
            OpenCV_Closing2D_Constant_Padding/10.220.223,180
            OpenCV_TopHat2D_Constant_Padding/10.2550.2552,639
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,732
            OpenCV_MorphGrad2D_Constant_Padding/10.2470.2472,827
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,123
            -
            Console output -
            2025-06-01T09:59:45+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.45, 1.75, 2.60
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      5.03 ms         5.03 ms          139
            -MLIR_Conv2D/1                                           7.38 ms         7.38 ms           95
            -Buddy_Conv2D/1                                         0.522 ms        0.521 ms         1363
            -Buddy_Corr2D_Constant_Padding/1                        0.814 ms        0.814 ms          865
            -OpenCV_Filter2D_Constant_Padding/1                      1.28 ms         1.28 ms          548
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.146 ms        0.146 ms         4815
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.267 ms        0.267 ms         2628
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       102923
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        48919
            -Buddy_Erosion2D_Constant_Padding/1                     0.222 ms        0.222 ms         3130
            -Buddy_Dilation2D_Constant_Padding/1                    0.220 ms        0.220 ms         3158
            -Buddy_Opening2D_Constant_Padding/1                     0.334 ms        0.334 ms         2177
            -Buddy_Closing2D_Constant_Padding/1                     0.338 ms        0.338 ms         2114
            -Buddy_TopHat2D_Constant_Padding/1                      0.893 ms        0.893 ms          734
            -Buddy_BottomHat2D_Constant_Padding/1                   0.899 ms        0.899 ms          761
            -OpenCV_Erode2D_Constant_Padding/1                      0.139 ms        0.139 ms         5020
            -OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3210
            -OpenCV_Closing2D_Constant_Padding/1                    0.220 ms        0.220 ms         3180
            -OpenCV_TopHat2D_Constant_Padding/1                     0.255 ms        0.255 ms         2639
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2732
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.247 ms        0.247 ms         2827
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5123
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 9ff6cb52..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            MLIR_Conv2D/17.197.1997
            Buddy_Conv2D/10.5240.5241,337
            Buddy_Corr2D_Constant_Padding/10.7920.792882
            OpenCV_Filter2D_Constant_Padding/11.251.25561
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,818
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.262,683
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006690.00669104,687
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,857
            Buddy_Erosion2D_Constant_Padding/10.2210.2213,251
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,237
            Buddy_Opening2D_Constant_Padding/10.3230.3232,241
            Buddy_Closing2D_Constant_Padding/10.3080.3082,271
            Buddy_TopHat2D_Constant_Padding/10.8050.805841
            Buddy_BottomHat2D_Constant_Padding/10.8090.809846
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,105
            OpenCV_Opening2D_Constant_Padding/10.2170.2173,219
            OpenCV_Closing2D_Constant_Padding/10.2170.2173,216
            OpenCV_TopHat2D_Constant_Padding/10.2580.2582,710
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,740
            OpenCV_MorphGrad2D_Constant_Padding/10.250.252,803
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,105
            -
            Console output -
            2025-06-01T10:00:09+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.48, 1.73, 2.57
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.89 ms         4.89 ms          143
            -MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
            -Buddy_Conv2D/1                                         0.524 ms        0.524 ms         1337
            -Buddy_Corr2D_Constant_Padding/1                        0.792 ms        0.792 ms          882
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          561
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4818
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.260 ms         2683
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104687
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49857
            -Buddy_Erosion2D_Constant_Padding/1                     0.221 ms        0.221 ms         3251
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3237
            -Buddy_Opening2D_Constant_Padding/1                     0.323 ms        0.323 ms         2241
            -Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2271
            -Buddy_TopHat2D_Constant_Padding/1                      0.805 ms        0.805 ms          841
            -Buddy_BottomHat2D_Constant_Padding/1                   0.809 ms        0.809 ms          846
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5105
            -OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3219
            -OpenCV_Closing2D_Constant_Padding/1                    0.217 ms        0.217 ms         3216
            -OpenCV_TopHat2D_Constant_Padding/1                     0.258 ms        0.258 ms         2710
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2740
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2803
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5105
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index d6181597..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.894.89143
            MLIR_Conv2D/17.197.1997
            Buddy_Conv2D/10.5230.5231,334
            Buddy_Corr2D_Constant_Padding/10.7930.793882
            OpenCV_Filter2D_Constant_Padding/11.251.25561
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,679
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006680.00668104,555
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,894
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,270
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,269
            Buddy_Opening2D_Constant_Padding/10.320.322,113
            Buddy_Closing2D_Constant_Padding/10.3060.3062,219
            Buddy_TopHat2D_Constant_Padding/10.7810.781863
            Buddy_BottomHat2D_Constant_Padding/10.7950.795836
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,175
            OpenCV_Opening2D_Constant_Padding/10.2170.2173,222
            OpenCV_Closing2D_Constant_Padding/10.2220.2223,153
            OpenCV_TopHat2D_Constant_Padding/10.2570.2572,720
            OpenCV_BottomHat2D_Constant_Padding/10.2570.2572,721
            OpenCV_MorphGrad2D_Constant_Padding/10.250.252,805
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,121
            -
            Console output -
            2025-06-01T10:00:33+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.32, 1.67, 2.52
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.89 ms         4.89 ms          143
            -MLIR_Conv2D/1                                           7.19 ms         7.19 ms           97
            -Buddy_Conv2D/1                                         0.523 ms        0.523 ms         1334
            -Buddy_Corr2D_Constant_Padding/1                        0.793 ms        0.793 ms          882
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          561
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2679
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104555
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49894
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3270
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3269
            -Buddy_Opening2D_Constant_Padding/1                     0.320 ms        0.320 ms         2113
            -Buddy_Closing2D_Constant_Padding/1                     0.306 ms        0.306 ms         2219
            -Buddy_TopHat2D_Constant_Padding/1                      0.781 ms        0.781 ms          863
            -Buddy_BottomHat2D_Constant_Padding/1                   0.795 ms        0.795 ms          836
            -OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5175
            -OpenCV_Opening2D_Constant_Padding/1                    0.217 ms        0.217 ms         3222
            -OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3153
            -OpenCV_TopHat2D_Constant_Padding/1                     0.257 ms        0.257 ms         2720
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.257 ms        0.257 ms         2721
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.250 ms        0.250 ms         2805
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5121
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 412d449c..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/14.874.87144
            MLIR_Conv2D/17.197.1897
            Buddy_Conv2D/10.5260.5261,322
            Buddy_Corr2D_Constant_Padding/10.7930.793885
            OpenCV_Filter2D_Constant_Padding/11.251.25560
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,865
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,697
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,155
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,883
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,259
            Buddy_Dilation2D_Constant_Padding/10.2130.2133,259
            Buddy_Opening2D_Constant_Padding/10.3150.3152,258
            Buddy_Closing2D_Constant_Padding/10.3120.3122,255
            Buddy_TopHat2D_Constant_Padding/10.7830.783854
            Buddy_BottomHat2D_Constant_Padding/10.7850.785821
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,119
            OpenCV_Opening2D_Constant_Padding/10.2210.2213,167
            OpenCV_Closing2D_Constant_Padding/10.2230.2233,139
            OpenCV_TopHat2D_Constant_Padding/10.2580.2582,712
            OpenCV_BottomHat2D_Constant_Padding/10.2560.2562,733
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,804
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,118
            -
            Console output -
            2025-06-01T10:00:56+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.23, 1.63, 2.49
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      4.87 ms         4.87 ms          144
            -MLIR_Conv2D/1                                           7.19 ms         7.18 ms           97
            -Buddy_Conv2D/1                                         0.526 ms        0.526 ms         1322
            -Buddy_Corr2D_Constant_Padding/1                        0.793 ms        0.793 ms          885
            -OpenCV_Filter2D_Constant_Padding/1                      1.25 ms         1.25 ms          560
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4865
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2697
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105155
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49883
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3259
            -Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3259
            -Buddy_Opening2D_Constant_Padding/1                     0.315 ms        0.315 ms         2258
            -Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2255
            -Buddy_TopHat2D_Constant_Padding/1                      0.783 ms        0.783 ms          854
            -Buddy_BottomHat2D_Constant_Padding/1                   0.785 ms        0.785 ms          821
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5119
            -OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3167
            -OpenCV_Closing2D_Constant_Padding/1                    0.223 ms        0.223 ms         3139
            -OpenCV_TopHat2D_Constant_Padding/1                     0.258 ms        0.258 ms         2712
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.256 ms        0.256 ms         2733
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2804
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5118
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index 905cc8e2..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.511.561
            MLIR_Conv2D/128.928.924
            Buddy_Conv2D/13.033.03231
            Buddy_Corr2D_Constant_Padding/12.312.31303
            OpenCV_Filter2D_Constant_Padding/14.114.11171
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,848
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,211
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,990
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,269
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,239
            Buddy_Opening2D_Constant_Padding/10.3140.3142,276
            Buddy_Closing2D_Constant_Padding/10.3080.3082,257
            Buddy_TopHat2D_Constant_Padding/10.8040.804840
            Buddy_BottomHat2D_Constant_Padding/10.7830.783836
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,131
            OpenCV_Opening2D_Constant_Padding/10.2240.2243,120
            OpenCV_Closing2D_Constant_Padding/10.2270.2273,078
            OpenCV_TopHat2D_Constant_Padding/10.260.262,688
            OpenCV_BottomHat2D_Constant_Padding/10.2590.2592,702
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,764
            OpenCV_Dilate2D_Constant_Padding/10.1390.1395,022
            -
            Console output -
            2025-06-01T10:01:20+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.15, 1.58, 2.45
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.5 ms         11.5 ms           61
            -MLIR_Conv2D/1                                           28.9 ms         28.9 ms           24
            -Buddy_Conv2D/1                                          3.03 ms         3.03 ms          231
            -Buddy_Corr2D_Constant_Padding/1                         2.31 ms         2.31 ms          303
            -OpenCV_Filter2D_Constant_Padding/1                      4.11 ms         4.11 ms          171
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4848
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105211
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49990
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3269
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3239
            -Buddy_Opening2D_Constant_Padding/1                     0.314 ms        0.314 ms         2276
            -Buddy_Closing2D_Constant_Padding/1                     0.308 ms        0.308 ms         2257
            -Buddy_TopHat2D_Constant_Padding/1                      0.804 ms        0.804 ms          840
            -Buddy_BottomHat2D_Constant_Padding/1                   0.783 ms        0.783 ms          836
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5131
            -OpenCV_Opening2D_Constant_Padding/1                    0.224 ms        0.224 ms         3120
            -OpenCV_Closing2D_Constant_Padding/1                    0.227 ms        0.227 ms         3078
            -OpenCV_TopHat2D_Constant_Padding/1                     0.260 ms        0.260 ms         2688
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.259 ms        0.259 ms         2702
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2764
            -OpenCV_Dilate2D_Constant_Padding/1                     0.139 ms        0.139 ms         5022
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 85d9b2ed..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/111.611.661
            MLIR_Conv2D/128.728.724
            Buddy_Conv2D/13.033.03231
            Buddy_Corr2D_Constant_Padding/12.312.31302
            OpenCV_Filter2D_Constant_Padding/14.14.1170
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1420.1424,835
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006680.00668104,962
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,843
            Buddy_Erosion2D_Constant_Padding/10.2150.2153,259
            Buddy_Dilation2D_Constant_Padding/10.2140.2143,254
            Buddy_Opening2D_Constant_Padding/10.3070.3072,262
            Buddy_Closing2D_Constant_Padding/10.3190.3192,241
            Buddy_TopHat2D_Constant_Padding/10.7840.784851
            Buddy_BottomHat2D_Constant_Padding/10.7770.777840
            OpenCV_Erode2D_Constant_Padding/10.1350.1355,192
            OpenCV_Opening2D_Constant_Padding/10.2310.2313,030
            OpenCV_Closing2D_Constant_Padding/10.2290.2293,053
            OpenCV_TopHat2D_Constant_Padding/10.2680.2682,609
            OpenCV_BottomHat2D_Constant_Padding/10.2670.2672,624
            OpenCV_MorphGrad2D_Constant_Padding/10.2580.2582,714
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,119
            -
            Console output -
            2025-06-01T10:01:44+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.10, 1.53, 2.41
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      11.6 ms         11.6 ms           61
            -MLIR_Conv2D/1                                           28.7 ms         28.7 ms           24
            -Buddy_Conv2D/1                                          3.03 ms         3.03 ms          231
            -Buddy_Corr2D_Constant_Padding/1                         2.31 ms         2.31 ms          302
            -OpenCV_Filter2D_Constant_Padding/1                      4.10 ms         4.10 ms          170
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.142 ms        0.142 ms         4835
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       104962
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49843
            -Buddy_Erosion2D_Constant_Padding/1                     0.215 ms        0.215 ms         3259
            -Buddy_Dilation2D_Constant_Padding/1                    0.214 ms        0.214 ms         3254
            -Buddy_Opening2D_Constant_Padding/1                     0.307 ms        0.307 ms         2262
            -Buddy_Closing2D_Constant_Padding/1                     0.319 ms        0.319 ms         2241
            -Buddy_TopHat2D_Constant_Padding/1                      0.784 ms        0.784 ms          851
            -Buddy_BottomHat2D_Constant_Padding/1                   0.777 ms        0.777 ms          840
            -OpenCV_Erode2D_Constant_Padding/1                      0.135 ms        0.135 ms         5192
            -OpenCV_Opening2D_Constant_Padding/1                    0.231 ms        0.231 ms         3030
            -OpenCV_Closing2D_Constant_Padding/1                    0.229 ms        0.229 ms         3053
            -OpenCV_TopHat2D_Constant_Padding/1                     0.268 ms        0.268 ms         2609
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.267 ms        0.267 ms         2624
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.258 ms        0.258 ms         2714
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5119
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index cf167376..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.621.631
            MLIR_Conv2D/166.766.711
            Buddy_Conv2D/16.126.12114
            Buddy_Corr2D_Constant_Padding/14.654.65151
            OpenCV_Filter2D_Constant_Padding/18.68.681
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,847
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,686
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,138
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,889
            Buddy_Erosion2D_Constant_Padding/10.2130.2133,257
            Buddy_Dilation2D_Constant_Padding/10.2130.2133,242
            Buddy_Opening2D_Constant_Padding/10.3130.3132,237
            Buddy_Closing2D_Constant_Padding/10.3180.3182,232
            Buddy_TopHat2D_Constant_Padding/10.7750.775853
            Buddy_BottomHat2D_Constant_Padding/10.7880.788846
            OpenCV_Erode2D_Constant_Padding/10.1380.1385,075
            OpenCV_Opening2D_Constant_Padding/10.2320.2323,015
            OpenCV_Closing2D_Constant_Padding/10.2250.2253,114
            OpenCV_TopHat2D_Constant_Padding/10.2640.2642,647
            OpenCV_BottomHat2D_Constant_Padding/10.2620.2622,672
            OpenCV_MorphGrad2D_Constant_Padding/10.2550.2552,749
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,111
            -
            Console output -
            2025-06-01T10:02:08+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.06, 1.49, 2.37
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      21.6 ms         21.6 ms           31
            -MLIR_Conv2D/1                                           66.7 ms         66.7 ms           11
            -Buddy_Conv2D/1                                          6.12 ms         6.12 ms          114
            -Buddy_Corr2D_Constant_Padding/1                         4.65 ms         4.65 ms          151
            -OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4847
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2686
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105138
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49889
            -Buddy_Erosion2D_Constant_Padding/1                     0.213 ms        0.213 ms         3257
            -Buddy_Dilation2D_Constant_Padding/1                    0.213 ms        0.213 ms         3242
            -Buddy_Opening2D_Constant_Padding/1                     0.313 ms        0.313 ms         2237
            -Buddy_Closing2D_Constant_Padding/1                     0.318 ms        0.318 ms         2232
            -Buddy_TopHat2D_Constant_Padding/1                      0.775 ms        0.775 ms          853
            -Buddy_BottomHat2D_Constant_Padding/1                   0.788 ms        0.788 ms          846
            -OpenCV_Erode2D_Constant_Padding/1                      0.138 ms        0.138 ms         5075
            -OpenCV_Opening2D_Constant_Padding/1                    0.232 ms        0.232 ms         3015
            -OpenCV_Closing2D_Constant_Padding/1                    0.225 ms        0.225 ms         3114
            -OpenCV_TopHat2D_Constant_Padding/1                     0.264 ms        0.264 ms         2647
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.262 ms        0.262 ms         2672
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.255 ms        0.255 ms         2749
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5111
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index 9dc07381..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/121.521.532
            MLIR_Conv2D/166.666.611
            Buddy_Conv2D/16.136.13114
            Buddy_Corr2D_Constant_Padding/14.654.65151
            OpenCV_Filter2D_Constant_Padding/18.68.681
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,693
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,362
            OpenCV_Resize2D_Bilinear_Interpolation/10.0140.01449,959
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,220
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,263
            Buddy_Opening2D_Constant_Padding/10.310.312,246
            Buddy_Closing2D_Constant_Padding/10.310.312,217
            Buddy_TopHat2D_Constant_Padding/10.7780.778828
            Buddy_BottomHat2D_Constant_Padding/10.7930.793833
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,129
            OpenCV_Opening2D_Constant_Padding/10.2260.2263,091
            OpenCV_Closing2D_Constant_Padding/10.2260.2263,096
            OpenCV_TopHat2D_Constant_Padding/10.2610.2612,677
            OpenCV_BottomHat2D_Constant_Padding/10.2610.2612,684
            OpenCV_MorphGrad2D_Constant_Padding/10.2530.2532,763
            OpenCV_Dilate2D_Constant_Padding/10.1360.1365,132
            -
            Console output -
            2025-06-01T10:02:32+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.04, 1.45, 2.33
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      21.5 ms         21.5 ms           32
            -MLIR_Conv2D/1                                           66.6 ms         66.6 ms           11
            -Buddy_Conv2D/1                                          6.13 ms         6.13 ms          114
            -Buddy_Corr2D_Constant_Padding/1                         4.65 ms         4.65 ms          151
            -OpenCV_Filter2D_Constant_Padding/1                      8.60 ms         8.60 ms           81
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2693
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105362
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49959
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3220
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3263
            -Buddy_Opening2D_Constant_Padding/1                     0.310 ms        0.310 ms         2246
            -Buddy_Closing2D_Constant_Padding/1                     0.310 ms        0.310 ms         2217
            -Buddy_TopHat2D_Constant_Padding/1                      0.778 ms        0.778 ms          828
            -Buddy_BottomHat2D_Constant_Padding/1                   0.793 ms        0.793 ms          833
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5129
            -OpenCV_Opening2D_Constant_Padding/1                    0.226 ms        0.226 ms         3091
            -OpenCV_Closing2D_Constant_Padding/1                    0.226 ms        0.226 ms         3096
            -OpenCV_TopHat2D_Constant_Padding/1                     0.261 ms        0.261 ms         2677
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.261 ms        0.261 ms         2684
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.253 ms        0.253 ms         2763
            -OpenCV_Dilate2D_Constant_Padding/1                     0.136 ms        0.136 ms         5132
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html deleted file mode 100644 index bdd3c9f0..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/141.541.517
            MLIR_Conv2D/11441445
            Buddy_Conv2D/110.510.567
            Buddy_Corr2D_Constant_Padding/17.957.9590
            OpenCV_Filter2D_Constant_Padding/15.895.89120
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,856
            Buddy_Resize2D_Bilinear_Interpolation/10.2610.2612,692
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006660.00666105,165
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,405
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,218
            Buddy_Dilation2D_Constant_Padding/10.2150.2153,226
            Buddy_Opening2D_Constant_Padding/10.3120.3122,175
            Buddy_Closing2D_Constant_Padding/10.3120.3122,264
            Buddy_TopHat2D_Constant_Padding/10.8210.821843
            Buddy_BottomHat2D_Constant_Padding/10.8180.818844
            OpenCV_Erode2D_Constant_Padding/10.1360.1365,142
            OpenCV_Opening2D_Constant_Padding/10.2210.2213,174
            OpenCV_Closing2D_Constant_Padding/10.2210.2213,164
            OpenCV_TopHat2D_Constant_Padding/10.2560.2562,735
            OpenCV_BottomHat2D_Constant_Padding/10.2580.2582,710
            OpenCV_MorphGrad2D_Constant_Padding/10.2510.2512,787
            OpenCV_Dilate2D_Constant_Padding/10.1370.1375,117
            -
            Console output -
            2025-06-01T10:02:56+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.03, 1.42, 2.30
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      41.5 ms         41.5 ms           17
            -MLIR_Conv2D/1                                            144 ms          144 ms            5
            -Buddy_Conv2D/1                                          10.5 ms         10.5 ms           67
            -Buddy_Corr2D_Constant_Padding/1                         7.95 ms         7.95 ms           90
            -OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          120
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4856
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.261 ms        0.261 ms         2692
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105165
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49405
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3218
            -Buddy_Dilation2D_Constant_Padding/1                    0.215 ms        0.215 ms         3226
            -Buddy_Opening2D_Constant_Padding/1                     0.312 ms        0.312 ms         2175
            -Buddy_Closing2D_Constant_Padding/1                     0.312 ms        0.312 ms         2264
            -Buddy_TopHat2D_Constant_Padding/1                      0.821 ms        0.821 ms          843
            -Buddy_BottomHat2D_Constant_Padding/1                   0.818 ms        0.818 ms          844
            -OpenCV_Erode2D_Constant_Padding/1                      0.136 ms        0.136 ms         5142
            -OpenCV_Opening2D_Constant_Padding/1                    0.221 ms        0.221 ms         3174
            -OpenCV_Closing2D_Constant_Padding/1                    0.221 ms        0.221 ms         3164
            -OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2735
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.258 ms        0.258 ms         2710
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.251 ms        0.251 ms         2787
            -OpenCV_Dilate2D_Constant_Padding/1                     0.137 ms        0.137 ms         5117
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html deleted file mode 100644 index e52e0fbd..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.html +++ /dev/null @@ -1,95 +0,0 @@ - - - -

            imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            2025-07-27 17:54:34 UTC

            -

            SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json

            - - - - - - - - - - - - - - - - - - - - - - -
            NameTime (ms)CPU (ms)Iterations
            Eigen_Convolve2D/134.234.220
            MLIR_Conv2D/11191196
            Buddy_Conv2D/110.510.567
            Buddy_Corr2D_Constant_Padding/17.897.8990
            OpenCV_Filter2D_Constant_Padding/15.895.89119
            Buddy_Resize2D_Nearest_Neighbour_Interpolation/10.1430.1434,857
            Buddy_Resize2D_Bilinear_Interpolation/10.260.262,690
            OpenCV_Resize2D_Nearest_Neighbour_Interpolation/10.006650.00665105,068
            OpenCV_Resize2D_Bilinear_Interpolation/10.01420.014249,449
            Buddy_Erosion2D_Constant_Padding/10.2140.2143,244
            Buddy_Dilation2D_Constant_Padding/10.2250.2253,243
            Buddy_Opening2D_Constant_Padding/10.3070.3072,260
            Buddy_Closing2D_Constant_Padding/10.3130.3132,223
            Buddy_TopHat2D_Constant_Padding/10.8180.818827
            Buddy_BottomHat2D_Constant_Padding/10.7970.796861
            OpenCV_Erode2D_Constant_Padding/10.1370.1375,101
            OpenCV_Opening2D_Constant_Padding/10.2190.2193,187
            OpenCV_Closing2D_Constant_Padding/10.2220.2223,142
            OpenCV_TopHat2D_Constant_Padding/10.2560.2562,731
            OpenCV_BottomHat2D_Constant_Padding/10.2550.2552,740
            OpenCV_MorphGrad2D_Constant_Padding/10.2490.2492,815
            OpenCV_Dilate2D_Constant_Padding/10.1350.1355,206
            -
            Console output -
            2025-06-01T10:03:20+00:00
            -Running ./bin/image-processing-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.02, 1.38, 2.27
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------------------------------------------
            -Benchmark                                                  Time             CPU   Iterations
            ---------------------------------------------------------------------------------------------
            -Eigen_Convolve2D/1                                      34.2 ms         34.2 ms           20
            -MLIR_Conv2D/1                                            119 ms          119 ms            6
            -Buddy_Conv2D/1                                          10.5 ms         10.5 ms           67
            -Buddy_Corr2D_Constant_Padding/1                         7.89 ms         7.89 ms           90
            -OpenCV_Filter2D_Constant_Padding/1                      5.89 ms         5.89 ms          119
            -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1       0.143 ms        0.143 ms         4857
            -Buddy_Resize2D_Bilinear_Interpolation/1                0.260 ms        0.260 ms         2690
            -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1      0.007 ms        0.007 ms       105068
            -OpenCV_Resize2D_Bilinear_Interpolation/1               0.014 ms        0.014 ms        49449
            -Buddy_Erosion2D_Constant_Padding/1                     0.214 ms        0.214 ms         3244
            -Buddy_Dilation2D_Constant_Padding/1                    0.225 ms        0.225 ms         3243
            -Buddy_Opening2D_Constant_Padding/1                     0.307 ms        0.307 ms         2260
            -Buddy_Closing2D_Constant_Padding/1                     0.313 ms        0.313 ms         2223
            -Buddy_TopHat2D_Constant_Padding/1                      0.818 ms        0.818 ms          827
            -Buddy_BottomHat2D_Constant_Padding/1                   0.797 ms        0.796 ms          861
            -OpenCV_Erode2D_Constant_Padding/1                      0.137 ms        0.137 ms         5101
            -OpenCV_Opening2D_Constant_Padding/1                    0.219 ms        0.219 ms         3187
            -OpenCV_Closing2D_Constant_Padding/1                    0.222 ms        0.222 ms         3142
            -OpenCV_TopHat2D_Constant_Padding/1                     0.256 ms        0.256 ms         2731
            -OpenCV_BottomHat2D_Constant_Padding/1                  0.255 ms        0.255 ms         2740
            -OpenCV_MorphGrad2D_Constant_Padding/1                  0.249 ms        0.249 ms         2815
            -OpenCV_Dilate2D_Constant_Padding/1                     0.135 ms        0.135 ms         5206
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -Saved PNG file.
            -
            \ No newline at end of file diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/index.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/index.html deleted file mode 100644 index 25d339ca..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/index.html +++ /dev/null @@ -1,14 +0,0 @@ ---- -layout: default -title: Benchmark run ---- - -

            Benchmark results

            - -
              -{% for f in site.static_files %} - {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %} -
            • {{ f.name }}
            • - {% endif %} -{% endfor %} -
            diff --git a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/vectorization/vectorization_matrix.html b/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/vectorization/vectorization_matrix.html deleted file mode 100644 index a4c1022e..00000000 --- a/site/benchmarks/2025-07-27/747c01564b9fd3515d28209766d1aae1034159e9/vectorization/vectorization_matrix.html +++ /dev/null @@ -1,40 +0,0 @@ - - - -

            vectorization/vectorization_matrix.json

            2025-07-27 17:54:34 UTC

            -

            vectorization_matrix.json

            - - -
            NameTime (ns)CPU (ns)Iterations
            MLIR_MatMul/118.818.837,302,822
            MLIR_MatVec/120.520.535,030,976
            -
            Console output -
            2025-06-01T10:11:11+00:00
            -Running ./vectorization-matrix-benchmark
            -Run on (24 X 5100 MHz CPU s)
            -CPU Caches:
            -  L1 Data 48 KiB (x12)
            -  L1 Instruction 32 KiB (x12)
            -  L2 Unified 1280 KiB (x12)
            -  L3 Unified 30720 KiB (x1)
            -Load Average: 1.00, 1.09, 1.76
            -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
            ---------------------------------------------------------
            -Benchmark              Time             CPU   Iterations
            ---------------------------------------------------------
            -MLIR_MatMul/1       18.8 ns         18.8 ns     37302822
            -MLIR_MatVec/1       20.5 ns         20.5 ns     35030976
            ---------------------------------------------------------
            -MLIR_MatMul: MLIR MatMul Operation + Nested Loop
            -[ 18 18 18 18 18 18 18 18 18 18 ]
            ---------------------------------------------------------
            -MLIR_MatVec: MLIR MatVec Operation
            -[ 18 18 18 18 18 18 18 18 18 18 ]
            -
            \ No newline at end of file diff --git a/site/benchmarks/latest/index.html b/site/benchmarks/latest/index.html deleted file mode 100644 index 78e7713b..00000000 --- a/site/benchmarks/latest/index.html +++ /dev/null @@ -1 +0,0 @@ - diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log index d76da098..e69de29b 100644 --- a/test_result/deeplearning/build_results_crosscompile_summary.log +++ b/test_result/deeplearning/build_results_crosscompile_summary.log @@ -1,29 +0,0 @@ -[Failed] Build of 'dl-model-tinyllama-benchmark' -[Failed] Build of 'dl-model-mobilenetv3-benchmark' -[Success] Build of 'dl-model-lenet-benchmark' -[Failed] Build of 'dl-model-bert-benchmark' -[Failed] Build of 'dl-model-whisper-benchmark' -[Failed] Build of 'dl-model-resnet18-benchmark' -[Success] Build of 'dl-layer-ffn-benchmark' -[Success] Build of 'dl-layer-selfattention-benchmark' -[Success] Build of 'dl-layer-rmsnorm-benchmark' -[Failed] Build of 'dl-op-linalg-matmul-benchmark' -[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' -[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' -[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' -[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' -[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' -[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' -[Success] Build of 'dl-op-linalg-arithaddf-benchmark' -[Success] Build of 'dl-op-linalg-arithdivf-benchmark' -[Success] Build of 'dl-op-linalg-arithmulf-benchmark' -[Success] Build of 'dl-op-linalg-arithnegf-benchmark' -[Success] Build of 'dl-op-linalg-arithsubf-benchmark' -[Success] Build of 'dl-op-linalg-mathfpow-benchmark' -[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark' -[Success] Build of 'dl-op-linalg-mathexp-benchmark' -[Success] Build of 'dl-op-linalg-reduceaddf-benchmark' -[Success] Build of 'dl-op-linalg-reducemaxf-benchmark' -[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' -[Failed] Build of 'dl-op-tosa-transpose-benchmark' -[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.json b/test_result/deeplearning/dl-layer-ffn-benchmark.json index 2aa81f81..fdea2004 100644 --- a/test_result/deeplearning/dl-layer-ffn-benchmark.json +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:26:49+00:00", + "date": "2025-09-07T12:45:30+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-layer-ffn-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.03564,1.19092,1.30615], + "load_avg": [2.51807,3.40967,5.1626], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 10762, - "real_time": 6.5369959010264392e-02, - "cpu_time": 6.5368221055565889e-02, + "iterations": 10218, + "real_time": 6.7533425334895703e-02, + "cpu_time": 6.7531935701702864e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 25673, - "real_time": 2.7106251964175147e-02, - "cpu_time": 2.7104521248003739e-02, + "iterations": 26193, + "real_time": 2.6626899614870417e-02, + "cpu_time": 2.6626213683045089e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log index 8a384bc4..b20bfaa2 100644 --- a/test_result/deeplearning/dl-layer-ffn-benchmark.log +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:26:49+00:00 +2025-09-07T12:45:30+00:00 Running ./dl-layer-ffn-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.04, 1.19, 1.31 +Load Average: 2.52, 3.41, 5.16 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------- -DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10762 -DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 25673 +DL_LAYER_FFN/Scalar 0.068 ms 0.068 ms 10218 +DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 26193 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json index e1b8cabe..e7f27984 100644 --- a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:26:53+00:00", + "date": "2025-09-07T12:45:34+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-layer-rmsnorm-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.03271,1.1875,1.3042], + "load_avg": [2.47656,3.38623,5.14551], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 356202, - "real_time": 1.9603331904748766e-03, - "cpu_time": 1.9603087601978656e-03, + "iterations": 339474, + "real_time": 1.9830409605425532e-03, + "cpu_time": 1.9829382397473739e-03, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 751546, - "real_time": 9.1466103771357563e-04, - "cpu_time": 9.1459137830551969e-04, + "iterations": 780156, + "real_time": 8.9165813354251345e-04, + "cpu_time": 8.9162349196827311e-04, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log index e0272f58..87b65afc 100644 --- a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:26:53+00:00 +2025-09-07T12:45:34+00:00 Running ./dl-layer-rmsnorm-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.03, 1.19, 1.30 +Load Average: 2.48, 3.39, 5.15 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------ -DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 356202 -DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 751546 +DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 339474 +DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 780156 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.json b/test_result/deeplearning/dl-layer-selfattention-benchmark.json index ad75a13e..f66a0d7e 100644 --- a/test_result/deeplearning/dl-layer-selfattention-benchmark.json +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:26:51+00:00", + "date": "2025-09-07T12:45:32+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-layer-selfattention-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.03564,1.19092,1.30615], + "load_avg": [2.51807,3.40967,5.1626], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 149, - "real_time": 4.6930055590283954e+00, - "cpu_time": 4.6929284496644295e+00, + "iterations": 144, + "real_time": 4.8677878868248730e+00, + "cpu_time": 4.8676234444444439e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 446, - "real_time": 1.5730429983660246e+00, - "cpu_time": 1.5730149080717490e+00, + "iterations": 435, + "real_time": 1.5936243722493622e+00, + "cpu_time": 1.5935723448275860e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log index 05dd5003..1db72b01 100644 --- a/test_result/deeplearning/dl-layer-selfattention-benchmark.log +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:26:51+00:00 +2025-09-07T12:45:32+00:00 Running ./dl-layer-selfattention-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.04, 1.19, 1.31 +Load Average: 2.52, 3.41, 5.16 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -DL_LAYER_ATTENTION/Scalar 4.69 ms 4.69 ms 149 -DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 446 +DL_LAYER_ATTENTION/Scalar 4.87 ms 4.87 ms 144 +DL_LAYER_ATTENTION/Auto_Vectorization 1.59 ms 1.59 ms 435 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.json b/test_result/deeplearning/dl-model-lenet-benchmark.json index 5b50e363..f50ed8e9 100644 --- a/test_result/deeplearning/dl-model-lenet-benchmark.json +++ b/test_result/deeplearning/dl-model-lenet-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:22:52+00:00", + "date": "2025-09-07T12:41:48+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-lenet-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.40137,1.39453,1.396], + "load_avg": [3.67334,4.12793,5.80713], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4304, - "real_time": 1.6466251636775675e-01, - "cpu_time": 1.6464831319702602e-01, + "iterations": 4111, + "real_time": 1.7333792885473193e-01, + "cpu_time": 1.7333462247628315e-01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5022, - "real_time": 1.3717319905366238e-01, - "cpu_time": 1.3716751294305060e-01, + "iterations": 4846, + "real_time": 1.4355380335623599e-01, + "cpu_time": 1.4355146595130003e-01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log index 0debea96..ac6de3f8 100644 --- a/test_result/deeplearning/dl-model-lenet-benchmark.log +++ b/test_result/deeplearning/dl-model-lenet-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:22:52+00:00 +2025-09-07T12:41:48+00:00 Running ./dl-model-lenet-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.40, 1.39, 1.40 +Load Average: 3.67, 4.13, 5.81 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------- -DL_MODEL_LENET/Auto_Vectorization 0.165 ms 0.165 ms 4304 -DL_MODEL_LENET/Buddy_Vectorization 0.137 ms 0.137 ms 5022 +DL_MODEL_LENET/Auto_Vectorization 0.173 ms 0.173 ms 4111 +DL_MODEL_LENET/Buddy_Vectorization 0.144 ms 0.144 ms 4846 ----------------------------------------------------------- Correctness Verification: -Transform case: FAIL +Transform case: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json index 7aa54d99..dd135dd9 100644 --- a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:22:49+00:00", + "date": "2025-09-07T12:41:45+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-mobilenetv3-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.40137,1.39453,1.396], + "load_avg": [3.67334,4.12793,5.80713], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 19, - "real_time": 3.7132115740525094e+01, - "cpu_time": 3.7130740473684206e+01, + "iterations": 17, + "real_time": 3.9183362222769681e+01, + "cpu_time": 3.9182252941176472e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 21, - "real_time": 3.2978398547995660e+01, - "cpu_time": 3.2976469809523813e+01, + "iterations": 20, + "real_time": 3.4668323397636414e+01, + "cpu_time": 3.4667267849999995e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log index 1a74d4d6..9a53be36 100644 --- a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:22:49+00:00 +2025-09-07T12:41:45+00:00 Running ./dl-model-mobilenetv3-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.40, 1.39, 1.40 +Load Average: 3.67, 4.13, 5.81 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------- -BM_MobileNet_V3/BM_MobileNet_V3_scalar 37.1 ms 37.1 ms 19 -BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 33.0 ms 33.0 ms 21 +BM_MobileNet_V3/BM_MobileNet_V3_scalar 39.2 ms 39.2 ms 17 +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 34.7 ms 34.7 ms 20 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.json b/test_result/deeplearning/dl-model-resnet18-benchmark.json index 95a6eefe..1628c1ef 100644 --- a/test_result/deeplearning/dl-model-resnet18-benchmark.json +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:26:46+00:00", + "date": "2025-09-07T12:45:27+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-resnet18-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.03906,1.19434,1.30811], + "load_avg": [2.47607,3.41699,5.17432], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 7.3107384704053402e+02, - "cpu_time": 7.2304104800000016e+02, + "real_time": 7.6702358201146126e+02, + "cpu_time": 7.6673241800000005e+02, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 7.2872185707092285e+02, - "cpu_time": 7.2154317500000013e+02, + "real_time": 7.7053957059979439e+02, + "cpu_time": 7.7040162699999996e+02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.log b/test_result/deeplearning/dl-model-resnet18-benchmark.log index d445e776..97e62844 100644 --- a/test_result/deeplearning/dl-model-resnet18-benchmark.log +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:26:46+00:00 +2025-09-07T12:45:27+00:00 Running ./dl-model-resnet18-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.04, 1.19, 1.31 +Load Average: 2.48, 3.42, 5.17 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -DL_MODEL_Resnet18/Auto_Vectorization 731 ms 723 ms 1 -DL_MODEL_Resnet18/Buddy_Vectorization 729 ms 722 ms 1 +DL_MODEL_Resnet18/Auto_Vectorization 767 ms 767 ms 1 +DL_MODEL_Resnet18/Buddy_Vectorization 771 ms 770 ms 1 ----------------------------------------------------------- Correctness Verification: PASS ----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.json b/test_result/deeplearning/dl-model-tinyllama-benchmark.json index cd8070f6..3961b66f 100644 --- a/test_result/deeplearning/dl-model-tinyllama-benchmark.json +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:17:33+00:00", + "date": "2025-09-07T12:35:22+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-tinyllama-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.70264,1.92041,1.53662], + "load_avg": [4.896,5.53271,6.99316], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.3918454140797257e+05, - "cpu_time": 1.3917853827299998e+05, + "real_time": 1.7120061315596104e+05, + "cpu_time": 1.7119792047700004e+05, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.0038352340459824e+04, - "cpu_time": 1.0037513700999994e+04, + "real_time": 1.1143549453467131e+04, + "cpu_time": 1.1135273949000009e+04, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 7.8359359223395586e+03, - "cpu_time": 7.2006253560000177e+03, + "real_time": 8.3347530625760555e+03, + "cpu_time": 7.7325455960000227e+03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.log b/test_result/deeplearning/dl-model-tinyllama-benchmark.log index 7ac00b10..b6f53ed8 100644 --- a/test_result/deeplearning/dl-model-tinyllama-benchmark.log +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:17:33+00:00 +2025-09-07T12:35:22+00:00 Running ./dl-model-tinyllama-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.70, 1.92, 1.54 +Load Average: 4.90, 5.53, 6.99 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ---------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------- -DL_MODEL_TINYLLAMA/scalar 139185 ms 139179 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt 10038 ms 10038 ms 1 -DL_MODEL_TINYLLAMA/matmul_opt_omp 7836 ms 7201 ms 1 +DL_MODEL_TINYLLAMA/scalar 171201 ms 171198 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt 11144 ms 11135 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt_omp 8335 ms 7733 ms 1 ---------- Verification ---------- matmul_opt PASS matmul_opt_omp PASS diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.json b/test_result/deeplearning/dl-model-whisper-benchmark.json index 97d932d5..add9864c 100644 --- a/test_result/deeplearning/dl-model-whisper-benchmark.json +++ b/test_result/deeplearning/dl-model-whisper-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:22:54+00:00", + "date": "2025-09-07T12:41:50+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-model-whisper-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.44971,1.40479,1.39941], + "load_avg": [3.69971,4.12549,5.79736], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 7.9983285805210471e+04, - "cpu_time": 7.9980347596000007e+04, + "real_time": 8.8294716205447912e+04, + "cpu_time": 8.8293256732999987e+04, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.6713125728070736e+04, - "cpu_time": 3.6699949372999996e+04, + "real_time": 4.0465919472277164e+04, + "cpu_time": 4.0458067526999999e+04, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.log b/test_result/deeplearning/dl-model-whisper-benchmark.log index b34ddef1..8cfcecec 100644 --- a/test_result/deeplearning/dl-model-whisper-benchmark.log +++ b/test_result/deeplearning/dl-model-whisper-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:22:54+00:00 +2025-09-07T12:41:50+00:00 Running ./dl-model-whisper-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,10 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.45, 1.40, 1.40 +Load Average: 3.70, 4.13, 5.80 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_MODEL_Whisper/Auto_Vectorization 79983 ms 79980 ms 1 -DL_MODEL_Whisper/Buddy_Vectorization 36713 ms 36700 ms 1 ------------------------------------------------------------ -Correctness Verification for Output1: PASS -Correctness Verification for Output2: FAIL ------------------------------------------------------------ +DL_MODEL_Whisper/Auto_Vectorization 88295 ms 88293 ms 1 +DL_MODEL_Whisper/Buddy_Vectorization 40466 ms 40458 ms 1 diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json index 56be6d39..bc957097 100644 --- a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:23+00:00", + "date": "2025-09-07T12:46:04+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithaddf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.07178,1.18408,1.29834], + "load_avg": [2.34521,3.26758,5.04932], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23451, - "real_time": 2.9521707521156536e-02, - "cpu_time": 2.9520921282674511e-02, + "iterations": 22527, + "real_time": 3.1060902958688446e-02, + "cpu_time": 3.1059653438096500e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 174931, - "real_time": 4.0048593536238502e-03, - "cpu_time": 4.0046560300918644e-03, + "iterations": 169988, + "real_time": 4.8817289258969946e-03, + "cpu_time": 4.8816731710473685e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log index 55be5634..4d355935 100644 --- a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:23+00:00 +2025-09-07T12:46:04+00:00 Running ./dl-op-linalg-arithaddf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.07, 1.18, 1.30 +Load Average: 2.35, 3.27, 5.05 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_ADDF_SCALAR 0.030 ms 0.030 ms 23451 -BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 174931 +BM_ADDF_SCALAR 0.031 ms 0.031 ms 22527 +BM_ADDF_AutoVectorization 0.005 ms 0.005 ms 169988 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json index b0bf62a8..629be93f 100644 --- a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:25+00:00", + "date": "2025-09-07T12:46:07+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithdivf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.07178,1.18408,1.29834], + "load_avg": [2.34521,3.26758,5.04932], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23358, - "real_time": 2.9819811858515573e-02, - "cpu_time": 2.9819110540285985e-02, + "iterations": 22003, + "real_time": 3.2068282908939941e-02, + "cpu_time": 3.2067214334408942e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 67517, - "real_time": 9.4892800457678818e-03, - "cpu_time": 9.4890076869529171e-03, + "iterations": 69823, + "real_time": 1.0602428337310130e-02, + "cpu_time": 1.0602179811809862e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log index 14368fa8..2e4e9d89 100644 --- a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:25+00:00 +2025-09-07T12:46:07+00:00 Running ./dl-op-linalg-arithdivf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.07, 1.18, 1.30 +Load Average: 2.35, 3.27, 5.05 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_DIVF_SCALAR 0.030 ms 0.030 ms 23358 -BM_DIVF_AutoVectorization 0.009 ms 0.009 ms 67517 +BM_DIVF_SCALAR 0.032 ms 0.032 ms 22003 +BM_DIVF_AutoVectorization 0.011 ms 0.011 ms 69823 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json index 323f84ef..a7052857 100644 --- a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:27+00:00", + "date": "2025-09-07T12:46:09+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithmulf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.07178,1.18408,1.29834], + "load_avg": [2.39795,3.2627,5.03809], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23441, - "real_time": 2.9818539820246910e-02, - "cpu_time": 2.9818065739516234e-02, + "iterations": 22824, + "real_time": 3.0709744117373212e-02, + "cpu_time": 3.0708664607430772e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 175263, - "real_time": 3.9962025305209769e-03, - "cpu_time": 3.9961001922824537e-03, + "iterations": 169993, + "real_time": 4.1166770421966290e-03, + "cpu_time": 4.1165691234344949e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log index 54426146..a6adac15 100644 --- a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:27+00:00 +2025-09-07T12:46:09+00:00 Running ./dl-op-linalg-arithmulf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.07, 1.18, 1.30 +Load Average: 2.40, 3.26, 5.04 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_MULF_SCALAR 0.030 ms 0.030 ms 23441 -BM_MULF_AutoVectorization 0.004 ms 0.004 ms 175263 +BM_MULF_SCALAR 0.031 ms 0.031 ms 22824 +BM_MULF_AutoVectorization 0.004 ms 0.004 ms 169993 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json index dbc3bdf1..282e1318 100644 --- a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:29+00:00", + "date": "2025-09-07T12:46:11+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithnegf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.06592,1.18066,1.29639], + "load_avg": [2.39795,3.2627,5.03809], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 30969, - "real_time": 2.2511821252336077e-02, - "cpu_time": 2.2511652910975493e-02, + "iterations": 29588, + "real_time": 2.3588028378715157e-02, + "cpu_time": 2.3587252737596327e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 277205, - "real_time": 2.4580652904906994e-03, - "cpu_time": 2.4580170235024620e-03, + "iterations": 237464, + "real_time": 2.9502898712950253e-03, + "cpu_time": 2.9501475507866456e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log index ba3ca38e..d6e48286 100644 --- a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:29+00:00 +2025-09-07T12:46:11+00:00 Running ./dl-op-linalg-arithnegf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.07, 1.18, 1.30 +Load Average: 2.40, 3.26, 5.04 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_NEGF_SCALAR 0.023 ms 0.023 ms 30969 -BM_NEGF_AutoVectorization 0.002 ms 0.002 ms 277205 +BM_NEGF_SCALAR 0.024 ms 0.024 ms 29588 +BM_NEGF_AutoVectorization 0.003 ms 0.003 ms 237464 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json index f6eb7c59..4660fe60 100644 --- a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:31+00:00", + "date": "2025-09-07T12:46:13+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-arithsubf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.06592,1.18066,1.29639], + "load_avg": [2.39795,3.2627,5.03809], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 23509, - "real_time": 2.9357210888091442e-02, - "cpu_time": 2.9356123654770513e-02, + "iterations": 22687, + "real_time": 3.0744381588195889e-02, + "cpu_time": 3.0743076916295679e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 175223, - "real_time": 3.9904742644157176e-03, - "cpu_time": 3.9903206884940911e-03, + "iterations": 170328, + "real_time": 4.1076257294038214e-03, + "cpu_time": 4.1075212472406180e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log index 0e7bfdce..df9b7125 100644 --- a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:31+00:00 +2025-09-07T12:46:13+00:00 Running ./dl-op-linalg-arithsubf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.07, 1.18, 1.30 +Load Average: 2.40, 3.26, 5.04 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_SUBF_SCALAR 0.029 ms 0.029 ms 23509 -BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 175223 +BM_SUBF_SCALAR 0.031 ms 0.031 ms 22687 +BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 170328 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json index b404dcfc..a25979f5 100644 --- a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:13+00:00", + "date": "2025-09-07T12:45:54+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-batch-matmul-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.08496,1.19092,1.30225], + "load_avg": [2.40869,3.31104,5.08252], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.5355993732810020e+03, - "cpu_time": 3.5355283939999999e+03, + "real_time": 3.6347736530005932e+03, + "cpu_time": 3.6346553479999998e+03, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 9.7580210678279400e+02, - "cpu_time": 9.7576049499999988e+02, + "real_time": 1.0061066299676895e+03, + "cpu_time": 1.0060745660000001e+03, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.9509307853877544e+02, - "cpu_time": 1.9508785299999954e+02, + "real_time": 1.9591017067432404e+02, + "cpu_time": 1.9590338600000035e+02, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.0930293612182140e+02, - "cpu_time": 1.0930234899999914e+02, + "real_time": 1.1179352924227715e+02, + "cpu_time": 1.1179250600000046e+02, "time_unit": "ms" }, { @@ -102,8 +102,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.1755896359682083e+02, - "cpu_time": 1.1755820300000065e+02, + "real_time": 1.2078800052404404e+02, + "cpu_time": 1.2078363899999988e+02, "time_unit": "ms" }, { @@ -116,8 +116,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.5555119253695011e+02, - "cpu_time": 3.5554585000000037e+02, + "real_time": 3.6683125793933868e+02, + "cpu_time": 3.6682773099999986e+02, "time_unit": "ms" }, { @@ -130,8 +130,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 6.2015011906623840e+01, - "cpu_time": 3.2137073000000349e+01, + "real_time": 1.1074854433536530e+02, + "cpu_time": 2.2687625000000544e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log index 30de184d..8d059c82 100644 --- a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:13+00:00 +2025-09-07T12:45:54+00:00 Running ./dl-op-linalg-batch-matmul-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,18 +6,18 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.19, 1.30 +Load Average: 2.41, 3.31, 5.08 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------- -DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3536 ms 3536 ms 1 -DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 976 ms 976 ms 1 -DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 195 ms 195 ms 1 -DL_OPS_BATCH_MATMUL/Tile/iterations:1 109 ms 109 ms 1 -DL_OPS_BATCH_MATMUL/SCF/iterations:1 118 ms 118 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 356 ms 356 ms 1 -DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 62.0 ms 32.1 ms 1 +DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3635 ms 3635 ms 1 +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 1006 ms 1006 ms 1 +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 196 ms 196 ms 1 +DL_OPS_BATCH_MATMUL/Tile/iterations:1 112 ms 112 ms 1 +DL_OPS_BATCH_MATMUL/SCF/iterations:1 121 ms 121 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 367 ms 367 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 111 ms 22.7 ms 1 ---------- Verification ---------- Tile PASS SCF PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json index ecbac2fd..3a979642 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:06+00:00", + "date": "2025-09-07T12:45:47+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-conv2d-nchw-fchw-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.10107,1.19775,1.30615], + "load_avg": [2.4834,3.35645,5.1167], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 2, - "real_time": 2.8337553981691599e+02, - "cpu_time": 2.8337256700000000e+02, + "real_time": 2.9066542163491249e+02, + "cpu_time": 2.9065969050000001e+02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101, - "real_time": 6.8005450660049327e+00, - "cpu_time": 6.8004278415841570e+00, + "iterations": 72, + "real_time": 8.5637474743028488e+00, + "cpu_time": 8.5636718611111107e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log index c1434203..fc332eee 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:06+00:00 +2025-09-07T12:45:47+00:00 Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.10, 1.20, 1.31 +Load Average: 2.48, 3.36, 5.12 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- -BM_Conv2DNchwFchw_SCALAR 283 ms 283 ms 2 -BM_Conv2DNchwFchw_Im2col 6.80 ms 6.80 ms 101 +BM_Conv2DNchwFchw_SCALAR 291 ms 291 ms 2 +BM_Conv2DNchwFchw_Im2col 8.56 ms 8.56 ms 72 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json index 09676753..595bcecd 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:10+00:00", + "date": "2025-09-07T12:45:51+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-conv2d-nhwc-fhwc-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.09277,1.19434,1.3042], + "load_avg": [2.44434,3.3335,5.09961], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 7.2274770587682724e+01, - "cpu_time": 7.2273260000000008e+01, + "real_time": 7.3888380080461502e+01, + "cpu_time": 7.3885279400000002e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 9.3490019440650940e+00, - "cpu_time": 9.3489287999999995e+00, + "real_time": 9.7335599362850189e+00, + "cpu_time": 9.7335111999999988e+00, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.8200688064098358e+00, - "cpu_time": 1.8200498000000009e+00, + "real_time": 1.8217429518699646e+00, + "cpu_time": 1.8217338000000027e+00, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.8165208399295807e+00, - "cpu_time": 1.8150957999999995e+00, + "real_time": 1.7791815102100372e+00, + "cpu_time": 1.7791528000000056e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log index 78da0d68..a49002a9 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:10+00:00 +2025-09-07T12:45:51+00:00 Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,15 +6,15 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.09, 1.19, 1.30 +Load Average: 2.44, 3.33, 5.10 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------------- -DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 72.3 ms 72.3 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.35 ms 9.35 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 73.9 ms 73.9 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.73 ms 9.73 ms 5 DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.82 ms 1.82 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.82 ms 1.82 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.78 ms 1.78 ms 5 ---------- Verification ---------- auto_vectorization PASS vectorization PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json index 2e56de32..17a679c1 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:08+00:00", + "date": "2025-09-07T12:45:49+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-conv2d-nhwc-hwcf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.09277,1.19434,1.3042], + "load_avg": [2.44434,3.3335,5.09961], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 22, - "real_time": 3.2280084253712133e+01, - "cpu_time": 3.2279513863636353e+01, + "iterations": 21, + "real_time": 3.3404812571548277e+01, + "cpu_time": 3.3404357952380956e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 113, - "real_time": 6.1282727339890153e+00, - "cpu_time": 6.1282270707964628e+00, + "iterations": 110, + "real_time": 6.2886948273940524e+00, + "cpu_time": 6.2886236181818180e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log index 590ee959..ff8e3c98 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:08+00:00 +2025-09-07T12:45:49+00:00 Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.09, 1.19, 1.30 +Load Average: 2.44, 3.33, 5.10 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------- -BM_CONV_2D_NHWC_HWCF_SCALAR 32.3 ms 32.3 ms 22 -BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.13 ms 6.13 ms 113 +BM_CONV_2D_NHWC_HWCF_SCALAR 33.4 ms 33.4 ms 21 +BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.29 ms 6.29 ms 110 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json index 03dc2cf5..b1b91623 100644 --- a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:11+00:00", + "date": "2025-09-07T12:45:52+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.09277,1.19434,1.3042], + "load_avg": [2.44434,3.3335,5.09961], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 4.2500682175159454e+00, - "cpu_time": 4.2499596000000004e+00, + "real_time": 4.3137572705745697e+00, + "cpu_time": 4.3121678000000001e+00, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.7119426280260086e+00, - "cpu_time": 1.7119346000000006e+00, + "real_time": 1.7169959843158722e+00, + "cpu_time": 1.7169760000000007e+00, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.2489855289459229e-01, - "cpu_time": 1.2490460000000037e-01, + "real_time": 1.2791678309440613e-01, + "cpu_time": 1.2791580000000025e-01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log index 89a765c1..82895553 100644 --- a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:11+00:00 +2025-09-07T12:45:52+00:00 Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,14 +6,14 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.09, 1.19, 1.30 +Load Average: 2.44, 3.33, 5.10 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------ -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 4.25 ms 4.25 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.71 ms 1.71 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.125 ms 0.125 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 4.31 ms 4.31 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.72 ms 1.72 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.128 ms 0.128 ms 5 ---------- Verification ---------- auto_vectorization PASS vectorization PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json index a6ccdbf2..1cc5b9de 100644 --- a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:37+00:00", + "date": "2025-09-07T12:46:19+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-mathexp-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.06055,1.17725,1.29443], + "load_avg": [2.49072,3.25391,5.01562], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 15225, - "real_time": 4.5636733275133207e-02, - "cpu_time": 4.5634870213464691e-02, + "iterations": 14801, + "real_time": 4.7153966502236092e-02, + "cpu_time": 4.7153170799270318e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 22248, - "real_time": 3.1553120752518572e-02, - "cpu_time": 3.1552361156058965e-02, + "iterations": 21304, + "real_time": 3.2612131513344626e-02, + "cpu_time": 3.2610319517461503e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log index 808a3eed..0a0532c4 100644 --- a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:37+00:00 +2025-09-07T12:46:19+00:00 Running ./dl-op-linalg-mathexp-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.06, 1.18, 1.29 +Load Average: 2.49, 3.25, 5.02 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- -BM_EXP_SCALAR 0.046 ms 0.046 ms 15225 -BM_EXP_AutoVectorization 0.032 ms 0.032 ms 22248 +BM_EXP_SCALAR 0.047 ms 0.047 ms 14801 +BM_EXP_AutoVectorization 0.033 ms 0.033 ms 21304 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json index fd613964..0b5c9ab3 100644 --- a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:33+00:00", + "date": "2025-09-07T12:46:15+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-mathfpow-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.06055,1.17725,1.29443], + "load_avg": [2.44629,3.2583,5.02686], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 8255, - "real_time": 8.4126440369310274e-02, - "cpu_time": 8.4123528649303461e-02, + "iterations": 8174, + "real_time": 8.5793241880358306e-02, + "cpu_time": 8.5789674944947408e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 12305, - "real_time": 5.6897423940429549e-02, - "cpu_time": 5.6896244047135301e-02, + "iterations": 11919, + "real_time": 5.8559470965724454e-02, + "cpu_time": 5.8556822887826147e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log index 0d519369..e763d9ba 100644 --- a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:33+00:00 +2025-09-07T12:46:15+00:00 Running ./dl-op-linalg-mathfpow-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.06, 1.18, 1.29 +Load Average: 2.45, 3.26, 5.03 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_FPOW_SCALAR 0.084 ms 0.084 ms 8255 -BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12305 +BM_FPOW_SCALAR 0.086 ms 0.086 ms 8174 +BM_FPOW_AutoVectorization 0.059 ms 0.059 ms 11919 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json index 05e4c7cd..e8085b93 100644 --- a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:35+00:00", + "date": "2025-09-07T12:46:17+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-mathrsqrt-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.06055,1.17725,1.29443], + "load_avg": [2.44629,3.2583,5.02686], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 9537, - "real_time": 7.2811122116920549e-02, - "cpu_time": 7.2809154975359128e-02, + "iterations": 9351, + "real_time": 7.4849401154169840e-02, + "cpu_time": 7.4846361458667521e-02, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 160927, - "real_time": 4.3499197210659290e-03, - "cpu_time": 4.3497833116879093e-03, + "iterations": 155807, + "real_time": 4.4754421888014168e-03, + "cpu_time": 4.4753065651735808e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log index 479779e0..5079264e 100644 --- a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:35+00:00 +2025-09-07T12:46:17+00:00 Running ./dl-op-linalg-mathrsqrt-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.06, 1.18, 1.29 +Load Average: 2.45, 3.26, 5.03 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9537 -BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 160927 +BM_RSQRT_SCALAR 0.075 ms 0.075 ms 9351 +BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 155807 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json index 1a7eb05e..2cc316a3 100644 --- a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:26:55+00:00", + "date": "2025-09-07T12:45:36+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-matmul-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.03271,1.1875,1.3042], + "load_avg": [2.47656,3.38623,5.14551], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.9328473061323166e+03, - "cpu_time": 3.9326995480000000e+03, + "real_time": 4.0999811291694641e+03, + "cpu_time": 4.0998556719999997e+03, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 3.2123229391872883e+03, - "cpu_time": 3.2121668160000004e+03, + "real_time": 3.5827754400670528e+03, + "cpu_time": 3.5826578559999998e+03, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 1.1747585423290730e+02, - "cpu_time": 1.1746541299999969e+02, + "real_time": 1.0819802060723305e+02, + "cpu_time": 1.0819740099999997e+02, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 5.9880897402763367e+01, - "cpu_time": 5.9877095000000047e+01, + "real_time": 6.1437729746103287e+01, + "cpu_time": 6.1437198000000137e+01, "time_unit": "ms" }, { @@ -102,8 +102,8 @@ "repetition_index": 0, "threads": 1, "iterations": 1, - "real_time": 2.1973790600895882e+01, - "cpu_time": 9.1101149999994746e+00, + "real_time": 1.8467400223016739e+01, + "cpu_time": 7.8750589999998510e+00, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log index 7d927f43..b46496bd 100644 --- a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:26:55+00:00 +2025-09-07T12:45:36+00:00 Running ./dl-op-linalg-matmul-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,16 +6,16 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.03, 1.19, 1.30 +Load Average: 2.48, 3.39, 5.15 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_OPS_MATMUL/scalar_O0/iterations:1 3933 ms 3933 ms 1 -DL_OPS_MATMUL/scalar_O3/iterations:1 3212 ms 3212 ms 1 -DL_OPS_MATMUL/tile/iterations:1 117 ms 117 ms 1 -DL_OPS_MATMUL/vec/iterations:1 59.9 ms 59.9 ms 1 -DL_OPS_MATMUL/vec_omp/iterations:1 22.0 ms 9.11 ms 1 +DL_OPS_MATMUL/scalar_O0/iterations:1 4100 ms 4100 ms 1 +DL_OPS_MATMUL/scalar_O3/iterations:1 3583 ms 3583 ms 1 +DL_OPS_MATMUL/tile/iterations:1 108 ms 108 ms 1 +DL_OPS_MATMUL/vec/iterations:1 61.4 ms 61.4 ms 1 +DL_OPS_MATMUL/vec_omp/iterations:1 18.5 ms 7.88 ms 1 ---------- Verification ---------- tile PASS vec PASS diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json index 3ad3bd62..e0b2bec9 100644 --- a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:11+00:00", + "date": "2025-09-07T12:45:52+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-pooling-nhwc-sum-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.09277,1.19434,1.3042], + "load_avg": [2.44434,3.3335,5.09961], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3002, - "real_time": 2.3286467260475718e-01, - "cpu_time": 2.3285699133910728e-01, + "iterations": 2922, + "real_time": 2.4033439441913615e-01, + "cpu_time": 2.4032710540725533e-01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 16950, - "real_time": 4.1440313236903302e-02, - "cpu_time": 4.1438350265486736e-02, + "iterations": 16330, + "real_time": 4.2958081279586446e-02, + "cpu_time": 4.2957538089406000e-02, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log index abad4aa4..43ece648 100644 --- a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:11+00:00 +2025-09-07T12:45:52+00:00 Running ./dl-op-linalg-pooling-nhwc-sum-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.09, 1.19, 1.30 +Load Average: 2.44, 3.33, 5.10 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 3002 -BM_POOLING_NHWC_SUM_AutoVectorization 0.041 ms 0.041 ms 16950 +BM_POOLING_NHWC_SUM_SCALAR 0.240 ms 0.240 ms 2922 +BM_POOLING_NHWC_SUM_AutoVectorization 0.043 ms 0.043 ms 16330 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json index 03a50be3..82f932ac 100644 --- a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:39+00:00", + "date": "2025-09-07T12:46:21+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-reduceaddf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.05566,1.17383,1.29248], + "load_avg": [2.49072,3.25391,5.01562], "library_build_type": "release" }, "benchmarks": [ diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log index 56c1154d..63793e54 100644 --- a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:39+00:00 +2025-09-07T12:46:21+00:00 Running ./dl-op-linalg-reduceaddf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,5 +6,5 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.06, 1.17, 1.29 +Load Average: 2.49, 3.25, 5.02 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json index 433fd2b7..fdb7b390 100644 --- a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:39+00:00", + "date": "2025-09-07T12:46:21+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-reducemaxf-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.05566,1.17383,1.29248], + "load_avg": [2.49072,3.25391,5.01562], "library_build_type": "release" }, "benchmarks": [ diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log index ce068d49..c7f97958 100644 --- a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:39+00:00 +2025-09-07T12:46:21+00:00 Running ./dl-op-linalg-reducemaxf-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,5 +6,5 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.06, 1.17, 1.29 +Load Average: 2.49, 3.25, 5.02 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json index d66ad13f..d57079ca 100644 --- a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:39+00:00", + "date": "2025-09-07T12:46:21+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-linalg-softmax-exp-sum-div-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.05566,1.17383,1.29248], + "load_avg": [2.49072,3.25391,5.01562], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 121646, - "real_time": 5.6556810458011850e-03, - "cpu_time": 5.6554908093977606e-03, + "iterations": 120007, + "real_time": 5.8092399238988792e-03, + "cpu_time": 5.8089997500145821e-03, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 181826, - "real_time": 3.8511731632752664e-03, - "cpu_time": 3.8510635992652325e-03, + "iterations": 176914, + "real_time": 3.9636845145346869e-03, + "cpu_time": 3.9634847383474463e-03, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log index b85c19b7..cfa81168 100644 --- a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:39+00:00 +2025-09-07T12:46:21+00:00 Running ./dl-op-linalg-softmax-exp-sum-div-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.06, 1.17, 1.29 +Load Average: 2.49, 3.25, 5.02 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 121646 -BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 181826 +BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 120007 +BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 176914 ----------------------------------------------------------- Correctness Verification: Transform case: PASS diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json index 0240521c..caa25dfa 100644 --- a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:42+00:00", + "date": "2025-09-07T12:46:24+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-matmul-transpose-b-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.05566,1.17383,1.29248], + "load_avg": [4.61377,3.68164,5.14453], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.0511430144309998e+03, - "cpu_time": 1.0495184466000001e+03, + "real_time": 1.0958168849349022e+03, + "cpu_time": 1.0942407130000001e+03, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 2.7828946411609650e+02, - "cpu_time": 2.7827974260000002e+02, + "real_time": 2.9605090841650963e+02, + "cpu_time": 2.9603718579999986e+02, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 3.2301727309823036e+01, - "cpu_time": 2.2431361399999972e+01, + "real_time": 3.6250606924295425e+01, + "cpu_time": 2.4062124000000118e+01, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 8.5547825321555138e+01, - "cpu_time": 8.5541207400000019e+01, + "real_time": 9.5354539155960083e+01, + "cpu_time": 9.5345416999999870e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log index e20623f1..98b32f04 100644 --- a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:42+00:00 +2025-09-07T12:46:24+00:00 Running ./dl-op-matmul-transpose-b-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,15 +6,15 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.06, 1.17, 1.29 +Load Average: 4.61, 3.68, 5.14 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ----------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------------------- -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1051 ms 1050 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 278 ms 278 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 32.3 ms 22.4 ms 5 -DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 85.5 ms 85.5 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1096 ms 1094 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 296 ms 296 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 36.3 ms 24.1 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 95.4 ms 95.3 ms 5 ---------- Verification ---------- scalar_O3 PASS scalar_O3_omp PASS diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json index fef15b7e..829e775c 100644 --- a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-07-27T14:27:41+00:00", + "date": "2025-09-07T12:46:23+00:00", "host_name": "4ed4bacfe45d", "executable": "./dl-op-tosa-transpose-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.05566,1.17383,1.29248], + "load_avg": [2.49072,3.25391,5.01562], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 2.6390058174729347e+01, - "cpu_time": 2.1415277600000003e+01, + "real_time": 4.1188374906778336e+01, + "cpu_time": 3.0001973999999997e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 5, - "real_time": 1.8864421173930168e+01, - "cpu_time": 1.3372037199999998e+01, + "real_time": 2.9296264052391052e+01, + "cpu_time": 2.4695980400000003e+01, "time_unit": "ms" } ] diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log index c9f95ebd..4b119245 100644 --- a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log @@ -1,4 +1,4 @@ -2025-07-27T14:27:41+00:00 +2025-09-07T12:46:23+00:00 Running ./dl-op-tosa-transpose-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,12 +6,12 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.06, 1.17, 1.29 +Load Average: 2.49, 3.25, 5.02 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------- -DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 26.4 ms 21.4 ms 5 -DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 18.9 ms 13.4 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 41.2 ms 30.0 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 29.3 ms 24.7 ms 5 ---------- Verification ---------- scalar_O3 PASS diff --git a/test_result/deeplearning/run_results_summary.log b/test_result/deeplearning/run_results_summary.log index 47cf8e48..b6ac768a 100644 --- a/test_result/deeplearning/run_results_summary.log +++ b/test_result/deeplearning/run_results_summary.log @@ -8,9 +8,8 @@ ↳ stdout/stderr → dl-model-lenet-benchmark.log ↳ gbench JSON → dl-model-lenet-benchmark.json [Missing] Executable not found for 'dl-model-bert-benchmark' -[Success] Run of 'dl-model-whisper-benchmark' - ↳ stdout/stderr → dl-model-whisper-benchmark.log - ↳ gbench JSON → dl-model-whisper-benchmark.json +[Failed] Run of 'dl-model-whisper-benchmark' + ↳ stdout/stderr → dl-model-whisper-benchmark.log (may contain errors) [Success] Run of 'dl-model-resnet18-benchmark' ↳ stdout/stderr → dl-model-resnet18-benchmark.log ↳ gbench JSON → dl-model-resnet18-benchmark.json From f48c925f58bb12d229c06fedfbb64ad420d9ecd4 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 15:52:27 +0200 Subject: [PATCH 42/52] ci(bench): build top-level benchmarks index, hide per-run pages from nav, and fix links --- .github/workflows/bench.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 5cf738ba..2d87e35e 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -101,6 +101,7 @@ jobs: --- layout: default title: Benchmark run + nav_exclude: true ---

            Benchmark results

            @@ -115,6 +116,37 @@ jobs: EOF + - name: Build top-level benchmarks index (list all runs) + working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site + run: | + set -e + out=benchmarks/index.html + mkdir -p benchmarks + { + cat <<'HTML' + --- + layout: default + title: Benchmarks + --- +

            Benchmark runs

            +

            Select a date and commit:

            + HTML + + # List dates newest first + for d in $(ls -1d benchmarks/20*/ | sort -r); do + d=${d%/} + echo "

            ${d#benchmarks/}

            " + echo "
              " + # List shas newest first if timestamps exist; otherwise lexicographic + for sha in $(ls -1d "$d"/*/ 2>/dev/null | sort -r); do + sha=${sha%/} + rel=${sha#benchmarks/} + echo "
            • ${rel}
            • " + done + echo "
            " + done + } > "$out" + - name: Push benchmark results uses: peaceiris/actions-gh-pages@v4 with: From b844343a8ee04d0b94143933aebb9f7f1acc1965 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 15:57:59 +0200 Subject: [PATCH 43/52] update --- .../build_results_crosscompile_summary.log | 29 ++++ .../deeplearning/build_results_summary.log | 29 ---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 134 ++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 46 +++--- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 ++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 134 ++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 ++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 134 ++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 ++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 134 ++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++---- .../image-processing-result.log | 2 +- .../vectorization/vectorization_matrix.json | 16 +-- .../vectorization/vectorization_matrix.log | 8 +- .../vectorization/vectorization_result.log | 42 +++--- 62 files changed, 2631 insertions(+), 2631 deletions(-) diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log index e69de29b..eaa29439 100644 --- a/test_result/deeplearning/build_results_crosscompile_summary.log +++ b/test_result/deeplearning/build_results_crosscompile_summary.log @@ -0,0 +1,29 @@ +[Failed] Build of 'dl-model-tinyllama-benchmark' +[Failed] Build of 'dl-model-mobilenetv3-benchmark' +[Failed] Build of 'dl-model-lenet-benchmark' +[Failed] Build of 'dl-model-bert-benchmark' +[Failed] Build of 'dl-model-whisper-benchmark' +[Failed] Build of 'dl-model-resnet18-benchmark' +[Failed] Build of 'dl-layer-ffn-benchmark' +[Failed] Build of 'dl-layer-selfattention-benchmark' +[Failed] Build of 'dl-layer-rmsnorm-benchmark' +[Failed] Build of 'dl-op-linalg-matmul-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Failed] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Failed] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Failed] Build of 'dl-op-linalg-arithaddf-benchmark' +[Failed] Build of 'dl-op-linalg-arithdivf-benchmark' +[Failed] Build of 'dl-op-linalg-arithmulf-benchmark' +[Failed] Build of 'dl-op-linalg-arithnegf-benchmark' +[Failed] Build of 'dl-op-linalg-arithsubf-benchmark' +[Failed] Build of 'dl-op-linalg-mathfpow-benchmark' +[Failed] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Failed] Build of 'dl-op-linalg-mathexp-benchmark' +[Failed] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Failed] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Failed] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Failed] Build of 'dl-op-tosa-transpose-benchmark' +[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/deeplearning/build_results_summary.log b/test_result/deeplearning/build_results_summary.log index de1252ba..e69de29b 100644 --- a/test_result/deeplearning/build_results_summary.log +++ b/test_result/deeplearning/build_results_summary.log @@ -1,29 +0,0 @@ -[Success] Build of 'dl-model-tinyllama-benchmark' -[Success] Build of 'dl-model-mobilenetv3-benchmark' -[Success] Build of 'dl-model-lenet-benchmark' -[Failed] Build of 'dl-model-bert-benchmark' -[Success] Build of 'dl-model-whisper-benchmark' -[Success] Build of 'dl-model-resnet18-benchmark' -[Success] Build of 'dl-layer-ffn-benchmark' -[Success] Build of 'dl-layer-selfattention-benchmark' -[Success] Build of 'dl-layer-rmsnorm-benchmark' -[Success] Build of 'dl-op-linalg-matmul-benchmark' -[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' -[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' -[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' -[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' -[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' -[Success] Build of 'dl-op-linalg-batch-matmul-benchmark' -[Success] Build of 'dl-op-linalg-arithaddf-benchmark' -[Success] Build of 'dl-op-linalg-arithdivf-benchmark' -[Success] Build of 'dl-op-linalg-arithmulf-benchmark' -[Success] Build of 'dl-op-linalg-arithnegf-benchmark' -[Success] Build of 'dl-op-linalg-arithsubf-benchmark' -[Success] Build of 'dl-op-linalg-mathfpow-benchmark' -[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark' -[Success] Build of 'dl-op-linalg-mathexp-benchmark' -[Success] Build of 'dl-op-linalg-reduceaddf-benchmark' -[Success] Build of 'dl-op-linalg-reducemaxf-benchmark' -[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' -[Success] Build of 'dl-op-tosa-transpose-benchmark' -[Success] Build of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index a6898082..7e3224f1 100644 --- a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:09:28+00:00", + "date": "2025-09-07T13:12:49+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00244,1.12988,1.85352], + "load_avg": [3.02686,3.38477,5.12598], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 143, - "real_time": 4.8773541030558674e+00, - "cpu_time": 4.8772445034965024e+00, + "iterations": 137, + "real_time": 5.0879708715598948e+00, + "cpu_time": 5.0878425036496351e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 97, - "real_time": 7.1875292462171965e+00, - "cpu_time": 7.1872436701030908e+00, + "iterations": 93, + "real_time": 7.6264414575792125e+00, + "cpu_time": 7.6260674086021520e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1675, - "real_time": 4.1845967520528765e-01, - "cpu_time": 4.1844541313432831e-01, + "iterations": 1666, + "real_time": 4.2049797410581435e-01, + "cpu_time": 4.2046406542617032e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 666, - "real_time": 1.0563786541511704e+00, - "cpu_time": 1.0563535375375375e+00, + "iterations": 638, + "real_time": 1.1059913177008165e+00, + "cpu_time": 1.1059258667711596e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 376, - "real_time": 1.8610506060909717e+00, - "cpu_time": 1.8609820930851073e+00, + "iterations": 361, + "real_time": 1.9581975348273142e+00, + "cpu_time": 1.9581129722991706e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4817, - "real_time": 1.4254830093992085e-01, - "cpu_time": 1.4254228129541210e-01, + "iterations": 4637, + "real_time": 1.5017545063202048e-01, + "cpu_time": 1.5017112659046808e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2689, - "real_time": 2.5988410633263775e-01, - "cpu_time": 2.5987645704722923e-01, + "iterations": 2576, + "real_time": 2.7208778318827564e-01, + "cpu_time": 2.7207776824534169e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105253, - "real_time": 6.6504519812804995e-03, - "cpu_time": 6.6503598092215932e-03, + "iterations": 102256, + "real_time": 7.4423461665179763e-03, + "cpu_time": 7.4419053551869818e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49956, - "real_time": 1.4011868008661712e-02, - "cpu_time": 1.4011432080230613e-02, + "iterations": 48431, + "real_time": 1.4448959287854052e-02, + "cpu_time": 1.4448407239164986e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3274, - "real_time": 2.1450668653028362e-01, - "cpu_time": 2.1450349694563184e-01, + "iterations": 2937, + "real_time": 2.5159487000403047e-01, + "cpu_time": 2.5157496322778355e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3255, - "real_time": 2.1387947674628960e-01, - "cpu_time": 2.1387067096774173e-01, + "iterations": 2951, + "real_time": 2.5003220001062026e-01, + "cpu_time": 2.5001731955269441e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2261, - "real_time": 3.1398964921413719e-01, - "cpu_time": 3.1398281954887258e-01, + "iterations": 1844, + "real_time": 4.3209650656794263e-01, + "cpu_time": 4.3208606724511922e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2259, - "real_time": 3.1822234319606257e-01, - "cpu_time": 3.1820692784417859e-01, + "iterations": 1576, + "real_time": 4.0304744237948797e-01, + "cpu_time": 4.0303471763959337e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 835, - "real_time": 8.0970415767438397e-01, - "cpu_time": 8.0966204670658792e-01, + "iterations": 672, + "real_time": 9.8597416875972632e-01, + "cpu_time": 9.8593559821428622e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 848, - "real_time": 7.8669123193424828e-01, - "cpu_time": 7.8665746108490497e-01, + "iterations": 661, + "real_time": 9.8078462905313890e-01, + "cpu_time": 9.8075158850226873e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5119, - "real_time": 1.3659681348356797e-01, - "cpu_time": 1.3659497030670081e-01, + "iterations": 4899, + "real_time": 1.4225564095579282e-01, + "cpu_time": 1.4225357236170624e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3092, - "real_time": 2.2627924897865831e-01, - "cpu_time": 2.2626982018111277e-01, + "iterations": 3062, + "real_time": 2.2066803679483377e-01, + "cpu_time": 2.2066302024820375e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3082, - "real_time": 2.2718680044666348e-01, - "cpu_time": 2.2718060447761135e-01, + "iterations": 3062, + "real_time": 2.2651180065316676e-01, + "cpu_time": 2.2650612083605487e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2689, - "real_time": 2.6025091304505082e-01, - "cpu_time": 2.6024617776124936e-01, + "iterations": 2652, + "real_time": 2.6190968519736918e-01, + "cpu_time": 2.6190097134238283e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2705, - "real_time": 2.5885344212685407e-01, - "cpu_time": 2.5885116820702392e-01, + "iterations": 2671, + "real_time": 2.6485771191829782e-01, + "cpu_time": 2.6484863721452689e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2779, - "real_time": 2.5195326185582478e-01, - "cpu_time": 2.5194584382871649e-01, + "iterations": 2662, + "real_time": 2.6326207129199375e-01, + "cpu_time": 2.6325002854996282e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5113, - "real_time": 1.3684895659714744e-01, - "cpu_time": 1.3684487287306843e-01, + "iterations": 4870, + "real_time": 1.4466622373896212e-01, + "cpu_time": 1.4466395708418861e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 22773f00..3e35d00b 100644 --- a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:09:28+00:00 +2025-09-07T13:12:49+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.13, 1.85 +Load Average: 3.03, 3.38, 5.13 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.88 ms 4.88 ms 143 -MLIR_Conv2D/1 7.19 ms 7.19 ms 97 -Buddy_Conv2D/1 0.418 ms 0.418 ms 1675 -Buddy_Corr2D_Constant_Padding/1 1.06 ms 1.06 ms 666 -OpenCV_Filter2D_Constant_Padding/1 1.86 ms 1.86 ms 376 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4817 -Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2689 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105253 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49956 -Buddy_Erosion2D_Constant_Padding/1 0.215 ms 0.215 ms 3274 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3255 -Buddy_Opening2D_Constant_Padding/1 0.314 ms 0.314 ms 2261 -Buddy_Closing2D_Constant_Padding/1 0.318 ms 0.318 ms 2259 -Buddy_TopHat2D_Constant_Padding/1 0.810 ms 0.810 ms 835 -Buddy_BottomHat2D_Constant_Padding/1 0.787 ms 0.787 ms 848 -OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5119 -OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3092 -OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3082 -OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2689 -OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2705 -OpenCV_MorphGrad2D_Constant_Padding/1 0.252 ms 0.252 ms 2779 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5113 +Eigen_Convolve2D/1 5.09 ms 5.09 ms 137 +MLIR_Conv2D/1 7.63 ms 7.63 ms 93 +Buddy_Conv2D/1 0.420 ms 0.420 ms 1666 +Buddy_Corr2D_Constant_Padding/1 1.11 ms 1.11 ms 638 +OpenCV_Filter2D_Constant_Padding/1 1.96 ms 1.96 ms 361 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4637 +Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2576 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102256 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48431 +Buddy_Erosion2D_Constant_Padding/1 0.252 ms 0.252 ms 2937 +Buddy_Dilation2D_Constant_Padding/1 0.250 ms 0.250 ms 2951 +Buddy_Opening2D_Constant_Padding/1 0.432 ms 0.432 ms 1844 +Buddy_Closing2D_Constant_Padding/1 0.403 ms 0.403 ms 1576 +Buddy_TopHat2D_Constant_Padding/1 0.986 ms 0.986 ms 672 +Buddy_BottomHat2D_Constant_Padding/1 0.981 ms 0.981 ms 661 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4899 +OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3062 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3062 +OpenCV_TopHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2652 +OpenCV_BottomHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2671 +OpenCV_MorphGrad2D_Constant_Padding/1 0.263 ms 0.263 ms 2662 +OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4870 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index f6dd1e3f..50bf9daa 100644 --- a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:09:52+00:00", + "date": "2025-09-07T13:13:14+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00049,1.12012,1.83398], + "load_avg": [2.80664,3.30371,5.05225], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 143, - "real_time": 4.8817551037023117e+00, - "cpu_time": 4.8816076643356636e+00, + "iterations": 140, + "real_time": 4.9840724893978665e+00, + "cpu_time": 4.9838604142857141e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 97, - "real_time": 7.1810987806811779e+00, - "cpu_time": 7.1807136907216469e+00, + "iterations": 92, + "real_time": 7.5595184917683182e+00, + "cpu_time": 7.5592466739130435e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1675, - "real_time": 4.1780948194105233e-01, - "cpu_time": 4.1780305313432836e-01, + "iterations": 1608, + "real_time": 4.3011411551886530e-01, + "cpu_time": 4.3009694029850742e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 662, - "real_time": 1.0580040790208156e+00, - "cpu_time": 1.0579419244712991e+00, + "iterations": 630, + "real_time": 1.1155422008226787e+00, + "cpu_time": 1.1154895079365073e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 376, - "real_time": 1.8630626046673415e+00, - "cpu_time": 1.8629938537234036e+00, + "iterations": 359, + "real_time": 1.9513797481910100e+00, + "cpu_time": 1.9512984373259061e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4857, - "real_time": 1.4277792460748584e-01, - "cpu_time": 1.4277163187152564e-01, + "iterations": 4613, + "real_time": 1.5032416584691746e-01, + "cpu_time": 1.5031801712551474e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2693, - "real_time": 2.6049529473068012e-01, - "cpu_time": 2.6048748124767929e-01, + "iterations": 2556, + "real_time": 2.7321518365299569e-01, + "cpu_time": 2.7320184233176831e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105155, - "real_time": 6.6589808706085229e-03, - "cpu_time": 6.6587386905044982e-03, + "iterations": 101295, + "real_time": 6.8678814235412153e-03, + "cpu_time": 6.8675380719680160e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49833, - "real_time": 1.4114310853593676e-02, - "cpu_time": 1.4113868821865060e-02, + "iterations": 48397, + "real_time": 1.4453964790490097e-02, + "cpu_time": 1.4453691468479458e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3267, - "real_time": 2.1469332323385956e-01, - "cpu_time": 2.1467965840220340e-01, + "iterations": 2868, + "real_time": 2.4664656855321829e-01, + "cpu_time": 2.4663631938633196e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3262, - "real_time": 2.1372071583255678e-01, - "cpu_time": 2.1371441998773794e-01, + "iterations": 2836, + "real_time": 2.4140081453768927e-01, + "cpu_time": 2.4138928561354012e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2259, - "real_time": 3.0885873203284975e-01, - "cpu_time": 3.0883940903054496e-01, + "iterations": 1792, + "real_time": 3.8493265414477457e-01, + "cpu_time": 3.8491138560267846e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2232, - "real_time": 3.1060778776243808e-01, - "cpu_time": 3.1059406586021510e-01, + "iterations": 1804, + "real_time": 3.9144566162858990e-01, + "cpu_time": 3.9143657760532191e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 854, - "real_time": 8.0134714157505949e-01, - "cpu_time": 8.0130978220140392e-01, + "iterations": 706, + "real_time": 1.0073629099738497e+00, + "cpu_time": 1.0073418994334273e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 833, - "real_time": 7.9537247892330531e-01, - "cpu_time": 7.9534494117647103e-01, + "iterations": 664, + "real_time": 1.0099171279334878e+00, + "cpu_time": 1.0098988192771083e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5118, - "real_time": 1.3649488219517478e-01, - "cpu_time": 1.3649151563110587e-01, + "iterations": 3883, + "real_time": 1.5074242133778640e-01, + "cpu_time": 1.5073515297450438e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3117, - "real_time": 2.2415243349742309e-01, - "cpu_time": 2.2414577446262374e-01, + "iterations": 3136, + "real_time": 2.1938790391408364e-01, + "cpu_time": 2.1937540688775489e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3087, - "real_time": 2.2616963212085381e-01, - "cpu_time": 2.2616179786200155e-01, + "iterations": 3187, + "real_time": 2.2193600938243105e-01, + "cpu_time": 2.2192187480388995e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2731, - "real_time": 2.5616902332849795e-01, - "cpu_time": 2.5615968912486209e-01, + "iterations": 2728, + "real_time": 2.6235867925316009e-01, + "cpu_time": 2.6234814479472190e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2693, - "real_time": 2.5978066425561641e-01, - "cpu_time": 2.5977364017823990e-01, + "iterations": 2717, + "real_time": 2.6013763031189086e-01, + "cpu_time": 2.6012419690835509e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2799, - "real_time": 2.4936800784683943e-01, - "cpu_time": 2.4935978027867164e-01, + "iterations": 2635, + "real_time": 2.6362094130416525e-01, + "cpu_time": 2.6361262201138502e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5051, - "real_time": 1.3865814552557773e-01, - "cpu_time": 1.3865657156998629e-01, + "iterations": 4736, + "real_time": 1.4790402162140487e-01, + "cpu_time": 1.4790140983952701e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 02471cda..9de080dd 100644 --- a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:09:52+00:00 +2025-09-07T13:13:14+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.12, 1.83 +Load Average: 2.81, 3.30, 5.05 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.88 ms 4.88 ms 143 -MLIR_Conv2D/1 7.18 ms 7.18 ms 97 -Buddy_Conv2D/1 0.418 ms 0.418 ms 1675 -Buddy_Corr2D_Constant_Padding/1 1.06 ms 1.06 ms 662 -OpenCV_Filter2D_Constant_Padding/1 1.86 ms 1.86 ms 376 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4857 -Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2693 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105155 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49833 -Buddy_Erosion2D_Constant_Padding/1 0.215 ms 0.215 ms 3267 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3262 -Buddy_Opening2D_Constant_Padding/1 0.309 ms 0.309 ms 2259 -Buddy_Closing2D_Constant_Padding/1 0.311 ms 0.311 ms 2232 -Buddy_TopHat2D_Constant_Padding/1 0.801 ms 0.801 ms 854 -Buddy_BottomHat2D_Constant_Padding/1 0.795 ms 0.795 ms 833 -OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5118 -OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3117 -OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3087 -OpenCV_TopHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2731 -OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2693 -OpenCV_MorphGrad2D_Constant_Padding/1 0.249 ms 0.249 ms 2799 -OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5051 +Eigen_Convolve2D/1 4.98 ms 4.98 ms 140 +MLIR_Conv2D/1 7.56 ms 7.56 ms 92 +Buddy_Conv2D/1 0.430 ms 0.430 ms 1608 +Buddy_Corr2D_Constant_Padding/1 1.12 ms 1.12 ms 630 +OpenCV_Filter2D_Constant_Padding/1 1.95 ms 1.95 ms 359 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4613 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2556 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101295 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48397 +Buddy_Erosion2D_Constant_Padding/1 0.247 ms 0.247 ms 2868 +Buddy_Dilation2D_Constant_Padding/1 0.241 ms 0.241 ms 2836 +Buddy_Opening2D_Constant_Padding/1 0.385 ms 0.385 ms 1792 +Buddy_Closing2D_Constant_Padding/1 0.391 ms 0.391 ms 1804 +Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 706 +Buddy_BottomHat2D_Constant_Padding/1 1.01 ms 1.01 ms 664 +OpenCV_Erode2D_Constant_Padding/1 0.151 ms 0.151 ms 3883 +OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3136 +OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3187 +OpenCV_TopHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2728 +OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2717 +OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2635 +OpenCV_Dilate2D_Constant_Padding/1 0.148 ms 0.148 ms 4736 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 2d7c7421..448cffd8 100644 --- a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:10:17+00:00", + "date": "2025-09-07T13:13:38+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1,1.10938,1.81006], + "load_avg": [2.88232,3.27881,4.99609], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 61, - "real_time": 1.1535078989433460e+01, - "cpu_time": 1.1535000672131149e+01, + "iterations": 57, + "real_time": 1.2154607396376761e+01, + "cpu_time": 1.2154355473684211e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 25, - "real_time": 2.9024361819028854e+01, - "cpu_time": 2.9023777840000001e+01, + "iterations": 23, + "real_time": 3.0489446352357451e+01, + "cpu_time": 3.0488431521739127e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 632, - "real_time": 1.1113395242468467e+00, - "cpu_time": 1.1113083670886079e+00, + "iterations": 791, + "real_time": 8.8596336458787306e-01, + "cpu_time": 8.8593867635903911e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 400, - "real_time": 1.7448093881830573e+00, - "cpu_time": 1.7447838250000003e+00, + "iterations": 381, + "real_time": 1.8370149857572371e+00, + "cpu_time": 1.8369234225721787e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 262, - "real_time": 2.6770318211376214e+00, - "cpu_time": 2.6768247938931302e+00, + "iterations": 251, + "real_time": 2.8152136926157065e+00, + "cpu_time": 2.8151388007968121e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4855, - "real_time": 1.4248677518844113e-01, - "cpu_time": 1.4247969289392393e-01, + "iterations": 4625, + "real_time": 1.4954823497179393e-01, + "cpu_time": 1.4954442335135137e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2692, - "real_time": 2.6068802961711546e-01, - "cpu_time": 2.6068349999999985e-01, + "iterations": 2572, + "real_time": 2.7326759223248315e-01, + "cpu_time": 2.7326206259720059e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105416, - "real_time": 6.6310110770308973e-03, - "cpu_time": 6.6309139883888547e-03, + "iterations": 102091, + "real_time": 6.9029587996913182e-03, + "cpu_time": 6.9026970937692798e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49870, - "real_time": 1.4010478047897181e-02, - "cpu_time": 1.4010075977541595e-02, + "iterations": 48431, + "real_time": 1.4439809400753574e-02, + "cpu_time": 1.4439482418285833e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3258, - "real_time": 2.1404682151502155e-01, - "cpu_time": 2.1404371424186619e-01, + "iterations": 2833, + "real_time": 2.3951560411401068e-01, + "cpu_time": 2.3949907130250636e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3246, - "real_time": 2.1395063944716403e-01, - "cpu_time": 2.1394007516943925e-01, + "iterations": 2959, + "real_time": 2.4086729588440925e-01, + "cpu_time": 2.4085952145995312e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2260, - "real_time": 3.2007832047158635e-01, - "cpu_time": 3.2006619601769948e-01, + "iterations": 1787, + "real_time": 3.8442675443287994e-01, + "cpu_time": 3.8440008337996617e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2223, - "real_time": 3.0985082283086512e-01, - "cpu_time": 3.0983889653621222e-01, + "iterations": 1757, + "real_time": 3.9290034837117571e-01, + "cpu_time": 3.9287781274900396e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 827, - "real_time": 8.0566855987585728e-01, - "cpu_time": 8.0563922007255140e-01, + "iterations": 664, + "real_time": 1.0083697298653873e+00, + "cpu_time": 1.0083280978915672e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 852, - "real_time": 8.2004826489678573e-01, - "cpu_time": 8.2000983802816896e-01, + "iterations": 650, + "real_time": 9.9526015611795282e-01, + "cpu_time": 9.9524376923076896e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5096, - "real_time": 1.3718380646859082e-01, - "cpu_time": 1.3718256004709559e-01, + "iterations": 4833, + "real_time": 1.4464196666362936e-01, + "cpu_time": 1.4463780633147102e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3136, - "real_time": 2.2296642240764078e-01, - "cpu_time": 2.2295898022959179e-01, + "iterations": 3097, + "real_time": 2.2951874991444030e-01, + "cpu_time": 2.2951408104617452e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3085, - "real_time": 2.2682713365825299e-01, - "cpu_time": 2.2682429335494333e-01, + "iterations": 3072, + "real_time": 2.2713451107847504e-01, + "cpu_time": 2.2713078320312519e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2693, - "real_time": 2.5972659152168526e-01, - "cpu_time": 2.5971731006312543e-01, + "iterations": 2632, + "real_time": 2.6567278452511978e-01, + "cpu_time": 2.6566629559270510e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2686, - "real_time": 2.6035926575902141e-01, - "cpu_time": 2.6035738272524256e-01, + "iterations": 2644, + "real_time": 2.6454315795570205e-01, + "cpu_time": 2.6453323524962147e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2746, - "real_time": 2.5441637364448810e-01, - "cpu_time": 2.5440569264384544e-01, + "iterations": 2613, + "real_time": 2.6846960489201538e-01, + "cpu_time": 2.6846243245311946e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5208, - "real_time": 1.3434360577752055e-01, - "cpu_time": 1.3434074481566843e-01, + "iterations": 4934, + "real_time": 1.4218385246392440e-01, + "cpu_time": 1.4217726043777826e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 1d883f92..f1e06fb2 100644 --- a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:10:17+00:00 +2025-09-07T13:13:38+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.11, 1.81 +Load Average: 2.88, 3.28, 5.00 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.5 ms 11.5 ms 61 -MLIR_Conv2D/1 29.0 ms 29.0 ms 25 -Buddy_Conv2D/1 1.11 ms 1.11 ms 632 -Buddy_Corr2D_Constant_Padding/1 1.74 ms 1.74 ms 400 -OpenCV_Filter2D_Constant_Padding/1 2.68 ms 2.68 ms 262 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.142 ms 0.142 ms 4855 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2692 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105416 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49870 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3258 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3246 -Buddy_Opening2D_Constant_Padding/1 0.320 ms 0.320 ms 2260 -Buddy_Closing2D_Constant_Padding/1 0.310 ms 0.310 ms 2223 -Buddy_TopHat2D_Constant_Padding/1 0.806 ms 0.806 ms 827 -Buddy_BottomHat2D_Constant_Padding/1 0.820 ms 0.820 ms 852 -OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5096 -OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3136 -OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3085 -OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2693 -OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2686 -OpenCV_MorphGrad2D_Constant_Padding/1 0.254 ms 0.254 ms 2746 -OpenCV_Dilate2D_Constant_Padding/1 0.134 ms 0.134 ms 5208 +Eigen_Convolve2D/1 12.2 ms 12.2 ms 57 +MLIR_Conv2D/1 30.5 ms 30.5 ms 23 +Buddy_Conv2D/1 0.886 ms 0.886 ms 791 +Buddy_Corr2D_Constant_Padding/1 1.84 ms 1.84 ms 381 +OpenCV_Filter2D_Constant_Padding/1 2.82 ms 2.82 ms 251 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4625 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2572 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102091 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48431 +Buddy_Erosion2D_Constant_Padding/1 0.240 ms 0.239 ms 2833 +Buddy_Dilation2D_Constant_Padding/1 0.241 ms 0.241 ms 2959 +Buddy_Opening2D_Constant_Padding/1 0.384 ms 0.384 ms 1787 +Buddy_Closing2D_Constant_Padding/1 0.393 ms 0.393 ms 1757 +Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 664 +Buddy_BottomHat2D_Constant_Padding/1 0.995 ms 0.995 ms 650 +OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4833 +OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3097 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3072 +OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2632 +OpenCV_BottomHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2644 +OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2613 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4934 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index c408eb70..91f4ace5 100644 --- a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:10:41+00:00", + "date": "2025-09-07T13:14:03+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1,1.09961,1.78809], + "load_avg": [2.92334,3.25537,4.94238], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 61, - "real_time": 1.1567577933434579e+01, - "cpu_time": 1.1567413278688525e+01, + "iterations": 58, + "real_time": 1.2106177300728600e+01, + "cpu_time": 1.2105513637931036e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 25, - "real_time": 2.9013997018337250e+01, - "cpu_time": 2.9013626159999994e+01, + "iterations": 23, + "real_time": 3.0653080862501394e+01, + "cpu_time": 3.0651176565217387e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 685, - "real_time": 1.0203661655422545e+00, - "cpu_time": 1.0203240744525550e+00, + "iterations": 682, + "real_time": 1.0277371405681208e+00, + "cpu_time": 1.0276775366568915e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 400, - "real_time": 1.7526307236403227e+00, - "cpu_time": 1.7526052500000000e+00, + "iterations": 369, + "real_time": 1.8722422239257068e+00, + "cpu_time": 1.8721983197831980e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 261, - "real_time": 2.6768200827398521e+00, - "cpu_time": 2.6767474022988518e+00, + "iterations": 249, + "real_time": 2.8038945303384559e+00, + "cpu_time": 2.8037449116465871e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4858, - "real_time": 1.4230732592463052e-01, - "cpu_time": 1.4230317393989289e-01, + "iterations": 4676, + "real_time": 1.4948565831568836e-01, + "cpu_time": 1.4947427502138574e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2692, - "real_time": 2.6008169433824374e-01, - "cpu_time": 2.6007254606240704e-01, + "iterations": 2573, + "real_time": 2.7347582233494189e-01, + "cpu_time": 2.7346715157403839e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105372, - "real_time": 6.6475084024833139e-03, - "cpu_time": 6.6474395854686256e-03, + "iterations": 101945, + "real_time": 6.9013301463456663e-03, + "cpu_time": 6.9010407082250192e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49847, - "real_time": 1.4039105948245419e-02, - "cpu_time": 1.4038696832306864e-02, + "iterations": 48219, + "real_time": 1.4578375478092348e-02, + "cpu_time": 1.4577905348514077e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3249, - "real_time": 2.1369310503521124e-01, - "cpu_time": 2.1368993444136664e-01, + "iterations": 2797, + "real_time": 2.4615478038276398e-01, + "cpu_time": 2.4613685055416532e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3265, - "real_time": 2.1320457811286220e-01, - "cpu_time": 2.1319636539050579e-01, + "iterations": 2766, + "real_time": 2.5387134588850813e-01, + "cpu_time": 2.5385709797541600e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2214, - "real_time": 3.1372998835217769e-01, - "cpu_time": 3.1372075700090379e-01, + "iterations": 1746, + "real_time": 3.8558237450912097e-01, + "cpu_time": 3.8556492955326516e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2229, - "real_time": 3.0766479326717089e-01, - "cpu_time": 3.0765202960969057e-01, + "iterations": 1774, + "real_time": 4.0888474475100989e-01, + "cpu_time": 4.0886437429537814e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 828, - "real_time": 7.8975142020246258e-01, - "cpu_time": 7.8972666304347949e-01, + "iterations": 683, + "real_time": 1.0146626910824728e+00, + "cpu_time": 1.0145995739385083e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 854, - "real_time": 7.7664592811528077e-01, - "cpu_time": 7.7661059836065438e-01, + "iterations": 678, + "real_time": 1.0159070435966362e+00, + "cpu_time": 1.0158491327433619e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5075, - "real_time": 1.3745506518873676e-01, - "cpu_time": 1.3745407389162539e-01, + "iterations": 4842, + "real_time": 1.4470969255627408e-01, + "cpu_time": 1.4470714684014824e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3111, - "real_time": 2.2506565262711115e-01, - "cpu_time": 2.2505757312761160e-01, + "iterations": 3067, + "real_time": 2.2680669984096108e-01, + "cpu_time": 2.2680151613954949e-01, "time_unit": "ms" }, { @@ -284,8 +284,8 @@ "repetition_index": 0, "threads": 1, "iterations": 3056, - "real_time": 2.2886360200677866e-01, - "cpu_time": 2.2885967277486849e-01, + "real_time": 2.2937234427721401e-01, + "cpu_time": 2.2935791001308917e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2672, - "real_time": 2.6211865430329734e-01, - "cpu_time": 2.6211163136227550e-01, + "iterations": 2685, + "real_time": 2.6044210824886516e-01, + "cpu_time": 2.6042399217877105e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2653, - "real_time": 2.6406221305182687e-01, - "cpu_time": 2.6405938220882053e-01, + "iterations": 2731, + "real_time": 2.6085500406590367e-01, + "cpu_time": 2.6084955108019009e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2750, - "real_time": 2.5381605868989771e-01, - "cpu_time": 2.5380610654545421e-01, + "iterations": 2609, + "real_time": 2.6988330050509018e-01, + "cpu_time": 2.6987879264085829e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5201, - "real_time": 1.3454156696590133e-01, - "cpu_time": 1.3453595366275758e-01, + "iterations": 4928, + "real_time": 1.4262092556780229e-01, + "cpu_time": 1.4261844784902611e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 08741675..4e0b0dbe 100644 --- a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:10:41+00:00 +2025-09-07T13:14:03+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.10, 1.79 +Load Average: 2.92, 3.26, 4.94 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.6 ms 11.6 ms 61 -MLIR_Conv2D/1 29.0 ms 29.0 ms 25 -Buddy_Conv2D/1 1.02 ms 1.02 ms 685 -Buddy_Corr2D_Constant_Padding/1 1.75 ms 1.75 ms 400 -OpenCV_Filter2D_Constant_Padding/1 2.68 ms 2.68 ms 261 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.142 ms 0.142 ms 4858 -Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2692 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105372 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49847 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3249 -Buddy_Dilation2D_Constant_Padding/1 0.213 ms 0.213 ms 3265 -Buddy_Opening2D_Constant_Padding/1 0.314 ms 0.314 ms 2214 -Buddy_Closing2D_Constant_Padding/1 0.308 ms 0.308 ms 2229 -Buddy_TopHat2D_Constant_Padding/1 0.790 ms 0.790 ms 828 -Buddy_BottomHat2D_Constant_Padding/1 0.777 ms 0.777 ms 854 -OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5075 -OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3111 +Eigen_Convolve2D/1 12.1 ms 12.1 ms 58 +MLIR_Conv2D/1 30.7 ms 30.7 ms 23 +Buddy_Conv2D/1 1.03 ms 1.03 ms 682 +Buddy_Corr2D_Constant_Padding/1 1.87 ms 1.87 ms 369 +OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 249 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4676 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2573 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101945 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 48219 +Buddy_Erosion2D_Constant_Padding/1 0.246 ms 0.246 ms 2797 +Buddy_Dilation2D_Constant_Padding/1 0.254 ms 0.254 ms 2766 +Buddy_Opening2D_Constant_Padding/1 0.386 ms 0.386 ms 1746 +Buddy_Closing2D_Constant_Padding/1 0.409 ms 0.409 ms 1774 +Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 683 +Buddy_BottomHat2D_Constant_Padding/1 1.02 ms 1.02 ms 678 +OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4842 +OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 3067 OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3056 -OpenCV_TopHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2672 -OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2653 -OpenCV_MorphGrad2D_Constant_Padding/1 0.254 ms 0.254 ms 2750 -OpenCV_Dilate2D_Constant_Padding/1 0.135 ms 0.135 ms 5201 +OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2685 +OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2731 +OpenCV_MorphGrad2D_Constant_Padding/1 0.270 ms 0.270 ms 2609 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4928 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 5f3d8f41..481e741d 100644 --- a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:05:28+00:00", + "date": "2025-09-07T13:08:44+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.18652,1.28955,2.11279], + "load_avg": [3.36963,4.10254,5.83887], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 144, - "real_time": 4.8743747304090199e+00, - "cpu_time": 4.8743142152777779e+00, + "iterations": 141, + "real_time": 4.9913290703127569e+00, + "cpu_time": 4.9912880992907809e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 95, - "real_time": 7.3758271963972790e+00, - "cpu_time": 7.3755260105263138e+00, + "iterations": 93, + "real_time": 7.5341148642442564e+00, + "cpu_time": 7.5340402903225794e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2241, - "real_time": 3.1217087819899686e-01, - "cpu_time": 3.1216030700580105e-01, + "iterations": 2162, + "real_time": 3.2283163781964480e-01, + "cpu_time": 3.2282717576318221e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 849, - "real_time": 8.2071478949208143e-01, - "cpu_time": 8.2067061248527651e-01, + "iterations": 841, + "real_time": 8.3219741474433973e-01, + "cpu_time": 8.3218935790725346e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 547, - "real_time": 1.2778125710145212e+00, - "cpu_time": 1.2777362102376604e+00, + "iterations": 536, + "real_time": 1.3002738662397684e+00, + "cpu_time": 1.3002208395522381e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4774, - "real_time": 1.4609858747938462e-01, - "cpu_time": 1.4609281943862598e-01, + "iterations": 4661, + "real_time": 1.4891540657627406e-01, + "cpu_time": 1.4890615962239859e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2627, - "real_time": 2.6652880571711685e-01, - "cpu_time": 2.6652126570232204e-01, + "iterations": 2568, + "real_time": 2.7335639259664812e-01, + "cpu_time": 2.7334750467289703e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103069, - "real_time": 6.7016581119360014e-03, - "cpu_time": 6.7014803869252636e-03, + "iterations": 101674, + "real_time": 6.8376584089808075e-03, + "cpu_time": 6.8374261954875460e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49911, - "real_time": 1.4021569295595974e-02, - "cpu_time": 1.4021311073711216e-02, + "iterations": 48511, + "real_time": 1.4459907311912134e-02, + "cpu_time": 1.4459377027890576e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3259, - "real_time": 2.1410988367712180e-01, - "cpu_time": 2.1410759803620766e-01, + "iterations": 2688, + "real_time": 2.5685550090635106e-01, + "cpu_time": 2.5684523921130975e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3230, - "real_time": 2.1453265723521495e-01, - "cpu_time": 2.1452785417956655e-01, + "iterations": 2734, + "real_time": 2.6675885585467135e-01, + "cpu_time": 2.6675183504023386e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2255, - "real_time": 3.0947743921216470e-01, - "cpu_time": 3.0946956718403540e-01, + "iterations": 1483, + "real_time": 4.7402130094478040e-01, + "cpu_time": 4.7401253742414040e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2273, - "real_time": 3.0895578070616231e-01, - "cpu_time": 3.0893229960404744e-01, + "iterations": 1350, + "real_time": 3.9245121732906058e-01, + "cpu_time": 3.9244753037037028e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 855, - "real_time": 7.7638528936090523e-01, - "cpu_time": 7.7633451111111051e-01, + "iterations": 666, + "real_time": 1.0719478924621690e+00, + "cpu_time": 1.0719183033033035e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 856, - "real_time": 7.7369363290893145e-01, - "cpu_time": 7.7366387383177460e-01, + "iterations": 593, + "real_time": 1.1050101011539188e+00, + "cpu_time": 1.1049833305227643e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5148, - "real_time": 1.3578200977651667e-01, - "cpu_time": 1.3577912917637908e-01, + "iterations": 4812, + "real_time": 1.4599609937354713e-01, + "cpu_time": 1.4599082543640898e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3185, - "real_time": 2.1949250724757297e-01, - "cpu_time": 2.1948565588697025e-01, + "iterations": 3117, + "real_time": 2.1899228256445571e-01, + "cpu_time": 2.1898928424767403e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3143, - "real_time": 2.2253769026453465e-01, - "cpu_time": 2.2253109035952864e-01, + "iterations": 3169, + "real_time": 2.2265837701497751e-01, + "cpu_time": 2.2265226285894635e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2699, - "real_time": 2.5904624301577195e-01, - "cpu_time": 2.5903694368284486e-01, + "iterations": 2719, + "real_time": 2.5787956823580138e-01, + "cpu_time": 2.5787464913571129e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2714, - "real_time": 2.5754975938761859e-01, - "cpu_time": 2.5754484340456885e-01, + "iterations": 2731, + "real_time": 2.5586019458023457e-01, + "cpu_time": 2.5585493079458066e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2791, - "real_time": 2.5068771725421052e-01, - "cpu_time": 2.5067944965962069e-01, + "iterations": 2598, + "real_time": 2.7497063137826044e-01, + "cpu_time": 2.7496818745188600e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5112, - "real_time": 1.3699782254387141e-01, - "cpu_time": 1.3699192468701091e-01, + "iterations": 4696, + "real_time": 1.4943660600221950e-01, + "cpu_time": 1.4943375085178881e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index f3a9e94d..439cfbbd 100644 --- a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:05:28+00:00 +2025-09-07T13:08:44+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.19, 1.29, 2.11 +Load Average: 3.37, 4.10, 5.84 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.87 ms 4.87 ms 144 -MLIR_Conv2D/1 7.38 ms 7.38 ms 95 -Buddy_Conv2D/1 0.312 ms 0.312 ms 2241 -Buddy_Corr2D_Constant_Padding/1 0.821 ms 0.821 ms 849 -OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 547 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4774 -Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2627 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103069 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49911 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3259 -Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3230 -Buddy_Opening2D_Constant_Padding/1 0.309 ms 0.309 ms 2255 -Buddy_Closing2D_Constant_Padding/1 0.309 ms 0.309 ms 2273 -Buddy_TopHat2D_Constant_Padding/1 0.776 ms 0.776 ms 855 -Buddy_BottomHat2D_Constant_Padding/1 0.774 ms 0.774 ms 856 -OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5148 -OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3185 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3143 -OpenCV_TopHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2699 -OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2714 -OpenCV_MorphGrad2D_Constant_Padding/1 0.251 ms 0.251 ms 2791 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5112 +Eigen_Convolve2D/1 4.99 ms 4.99 ms 141 +MLIR_Conv2D/1 7.53 ms 7.53 ms 93 +Buddy_Conv2D/1 0.323 ms 0.323 ms 2162 +Buddy_Corr2D_Constant_Padding/1 0.832 ms 0.832 ms 841 +OpenCV_Filter2D_Constant_Padding/1 1.30 ms 1.30 ms 536 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4661 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2568 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101674 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48511 +Buddy_Erosion2D_Constant_Padding/1 0.257 ms 0.257 ms 2688 +Buddy_Dilation2D_Constant_Padding/1 0.267 ms 0.267 ms 2734 +Buddy_Opening2D_Constant_Padding/1 0.474 ms 0.474 ms 1483 +Buddy_Closing2D_Constant_Padding/1 0.392 ms 0.392 ms 1350 +Buddy_TopHat2D_Constant_Padding/1 1.07 ms 1.07 ms 666 +Buddy_BottomHat2D_Constant_Padding/1 1.11 ms 1.10 ms 593 +OpenCV_Erode2D_Constant_Padding/1 0.146 ms 0.146 ms 4812 +OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3117 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3169 +OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2719 +OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2731 +OpenCV_MorphGrad2D_Constant_Padding/1 0.275 ms 0.275 ms 2598 +OpenCV_Dilate2D_Constant_Padding/1 0.149 ms 0.149 ms 4696 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 32a5db2d..2b2ab04c 100644 --- a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:05:52+00:00", + "date": "2025-09-07T13:09:08+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.1958,1.28613,2.09277], + "load_avg": [3.19043,3.99707,5.75732], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 144, - "real_time": 4.8990769104825125e+00, - "cpu_time": 4.8990283611111103e+00, + "iterations": 135, + "real_time": 5.1618975345735194e+00, + "cpu_time": 5.1616230666666665e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 97, - "real_time": 7.2054376960107964e+00, - "cpu_time": 7.2053482680412344e+00, + "iterations": 94, + "real_time": 7.6303360389268144e+00, + "cpu_time": 7.6299276276595762e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2257, - "real_time": 3.1050090547230164e-01, - "cpu_time": 3.1049665263624282e-01, + "iterations": 1898, + "real_time": 3.6347741055036370e-01, + "cpu_time": 3.6347207218124333e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 878, - "real_time": 7.9843645877202718e-01, - "cpu_time": 7.9842614464692518e-01, + "iterations": 821, + "real_time": 8.4532172522359006e-01, + "cpu_time": 8.4524135322777105e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 560, - "real_time": 1.2482626529942666e+00, - "cpu_time": 1.2482019642857143e+00, + "iterations": 535, + "real_time": 1.3060625816617057e+00, + "cpu_time": 1.3060270074766349e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4872, - "real_time": 1.4268146057067246e-01, - "cpu_time": 1.4267865578817737e-01, + "iterations": 4701, + "real_time": 1.4897938473025324e-01, + "cpu_time": 1.4897103637523929e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2654, - "real_time": 2.6207054195295354e-01, - "cpu_time": 2.6205789299171084e-01, + "iterations": 2567, + "real_time": 2.7154003791322401e-01, + "cpu_time": 2.7153190027269208e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105278, - "real_time": 6.6562565302057996e-03, - "cpu_time": 6.6561646592830431e-03, + "iterations": 102141, + "real_time": 6.8959888496122178e-03, + "cpu_time": 6.8956638861965335e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49913, - "real_time": 1.4036607057034684e-02, - "cpu_time": 1.4036086510528307e-02, + "iterations": 48431, + "real_time": 1.4446364637891266e-02, + "cpu_time": 1.4446001445355251e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3221, - "real_time": 2.1425787346753894e-01, - "cpu_time": 2.1425355448618436e-01, + "iterations": 2834, + "real_time": 2.4457907615704022e-01, + "cpu_time": 2.4455411467889898e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3271, - "real_time": 2.1439506175484507e-01, - "cpu_time": 2.1438375817792729e-01, + "iterations": 2817, + "real_time": 2.5071918747507177e-01, + "cpu_time": 2.5071165566205178e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2213, - "real_time": 3.1620805003203473e-01, - "cpu_time": 3.1619105106190648e-01, + "iterations": 1763, + "real_time": 3.8233273665268042e-01, + "cpu_time": 3.8232422007941014e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2230, - "real_time": 3.1144172203781356e-01, - "cpu_time": 3.1142306591928215e-01, + "iterations": 1772, + "real_time": 3.9655546832232658e-01, + "cpu_time": 3.9653353950338632e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 866, - "real_time": 8.0038771876424342e-01, - "cpu_time": 8.0036796073903105e-01, + "iterations": 651, + "real_time": 1.0213724948385710e+00, + "cpu_time": 1.0213337788018413e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 846, - "real_time": 7.9733535127789135e-01, - "cpu_time": 7.9729864420803698e-01, + "iterations": 675, + "real_time": 1.0147930460947532e+00, + "cpu_time": 1.0147491318518496e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5058, - "real_time": 1.3818436612685522e-01, - "cpu_time": 1.3818210241202053e-01, + "iterations": 4248, + "real_time": 1.6329018223729061e-01, + "cpu_time": 1.6328074152542363e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3149, - "real_time": 2.2202210187930915e-01, - "cpu_time": 2.2201345633534444e-01, + "iterations": 2850, + "real_time": 2.4692150323014511e-01, + "cpu_time": 2.4691329473684170e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3169, - "real_time": 2.2069638393379679e-01, - "cpu_time": 2.2069076932786405e-01, + "iterations": 2844, + "real_time": 2.5029316460700646e-01, + "cpu_time": 2.5028627601969067e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2725, - "real_time": 2.5712088160558577e-01, - "cpu_time": 2.5710837284403609e-01, + "iterations": 2495, + "real_time": 2.8614350603912064e-01, + "cpu_time": 2.8613096152304607e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2715, - "real_time": 2.5740468919167420e-01, - "cpu_time": 2.5739238526703495e-01, + "iterations": 2450, + "real_time": 2.8474553811306857e-01, + "cpu_time": 2.8473111061224476e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2798, - "real_time": 2.5014725085707373e-01, - "cpu_time": 2.5013296140099989e-01, + "iterations": 2455, + "real_time": 2.8508260481954834e-01, + "cpu_time": 2.8507377230142528e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5116, - "real_time": 1.3708527417645219e-01, - "cpu_time": 1.3708149824081331e-01, + "iterations": 4286, + "real_time": 1.6393232873345001e-01, + "cpu_time": 1.6392709612692480e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 875f2bd7..4a4578d7 100644 --- a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:05:52+00:00 +2025-09-07T13:09:08+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.20, 1.29, 2.09 +Load Average: 3.19, 4.00, 5.76 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.90 ms 4.90 ms 144 -MLIR_Conv2D/1 7.21 ms 7.21 ms 97 -Buddy_Conv2D/1 0.311 ms 0.310 ms 2257 -Buddy_Corr2D_Constant_Padding/1 0.798 ms 0.798 ms 878 -OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 560 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4872 -Buddy_Resize2D_Bilinear_Interpolation/1 0.262 ms 0.262 ms 2654 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105278 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49913 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3221 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3271 -Buddy_Opening2D_Constant_Padding/1 0.316 ms 0.316 ms 2213 -Buddy_Closing2D_Constant_Padding/1 0.311 ms 0.311 ms 2230 -Buddy_TopHat2D_Constant_Padding/1 0.800 ms 0.800 ms 866 -Buddy_BottomHat2D_Constant_Padding/1 0.797 ms 0.797 ms 846 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5058 -OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3149 -OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3169 -OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2725 -OpenCV_BottomHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2715 -OpenCV_MorphGrad2D_Constant_Padding/1 0.250 ms 0.250 ms 2798 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5116 +Eigen_Convolve2D/1 5.16 ms 5.16 ms 135 +MLIR_Conv2D/1 7.63 ms 7.63 ms 94 +Buddy_Conv2D/1 0.363 ms 0.363 ms 1898 +Buddy_Corr2D_Constant_Padding/1 0.845 ms 0.845 ms 821 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 535 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4701 +Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2567 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102141 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48431 +Buddy_Erosion2D_Constant_Padding/1 0.245 ms 0.245 ms 2834 +Buddy_Dilation2D_Constant_Padding/1 0.251 ms 0.251 ms 2817 +Buddy_Opening2D_Constant_Padding/1 0.382 ms 0.382 ms 1763 +Buddy_Closing2D_Constant_Padding/1 0.397 ms 0.397 ms 1772 +Buddy_TopHat2D_Constant_Padding/1 1.02 ms 1.02 ms 651 +Buddy_BottomHat2D_Constant_Padding/1 1.01 ms 1.01 ms 675 +OpenCV_Erode2D_Constant_Padding/1 0.163 ms 0.163 ms 4248 +OpenCV_Opening2D_Constant_Padding/1 0.247 ms 0.247 ms 2850 +OpenCV_Closing2D_Constant_Padding/1 0.250 ms 0.250 ms 2844 +OpenCV_TopHat2D_Constant_Padding/1 0.286 ms 0.286 ms 2495 +OpenCV_BottomHat2D_Constant_Padding/1 0.285 ms 0.285 ms 2450 +OpenCV_MorphGrad2D_Constant_Padding/1 0.285 ms 0.285 ms 2455 +OpenCV_Dilate2D_Constant_Padding/1 0.164 ms 0.164 ms 4286 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 7f40b284..0a1d1b28 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:06:16+00:00", + "date": "2025-09-07T13:09:33+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.12793,1.26172,2.06299], + "load_avg": [3.04053,3.89941,5.67725], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 147, - "real_time": 4.7720991151065242e+00, - "cpu_time": 4.7720388095238100e+00, + "iterations": 137, + "real_time": 5.1388616000648835e+00, + "cpu_time": 5.1386628175182478e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 97, - "real_time": 7.1972473956567722e+00, - "cpu_time": 7.1971010103092770e+00, + "iterations": 92, + "real_time": 7.5730732844575588e+00, + "cpu_time": 7.5728530760869583e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2252, - "real_time": 3.1011967074463465e-01, - "cpu_time": 3.1011620293072806e-01, + "iterations": 2206, + "real_time": 3.2243556288135994e-01, + "cpu_time": 3.2241800589301906e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 868, - "real_time": 8.0242218913227181e-01, - "cpu_time": 8.0239820852534560e-01, + "iterations": 826, + "real_time": 8.3711696450415884e-01, + "cpu_time": 8.3709625423728795e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 560, - "real_time": 1.2489744295765246e+00, - "cpu_time": 1.2489337464285726e+00, + "iterations": 531, + "real_time": 1.3111124899091020e+00, + "cpu_time": 1.3110567871939727e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4854, - "real_time": 1.4281208113873500e-01, - "cpu_time": 1.4280516625463535e-01, + "iterations": 4648, + "real_time": 1.4950641512665608e-01, + "cpu_time": 1.4950070998278833e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2649, - "real_time": 2.6116495907711684e-01, - "cpu_time": 2.6115319365798423e-01, + "iterations": 2560, + "real_time": 2.7398462261771783e-01, + "cpu_time": 2.7396728242187507e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105099, - "real_time": 6.6859832037041806e-03, - "cpu_time": 6.6857738322914631e-03, + "iterations": 102196, + "real_time": 6.9250278208413379e-03, + "cpu_time": 6.9248903968844146e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49931, - "real_time": 1.4034184800308621e-02, - "cpu_time": 1.4033786765736724e-02, + "iterations": 45300, + "real_time": 1.4499240281434511e-02, + "cpu_time": 1.4498946423841065e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3253, - "real_time": 2.1422339406905452e-01, - "cpu_time": 2.1422094743313869e-01, + "iterations": 2831, + "real_time": 2.4162875557484789e-01, + "cpu_time": 2.4161129918756594e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3245, - "real_time": 2.1412110670781098e-01, - "cpu_time": 2.1411618335901397e-01, + "iterations": 2893, + "real_time": 2.4165965119654276e-01, + "cpu_time": 2.4165194745938479e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2229, - "real_time": 3.1633685711067638e-01, - "cpu_time": 3.1632788515029109e-01, + "iterations": 1793, + "real_time": 3.9718762389189427e-01, + "cpu_time": 3.9718364082543139e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2256, - "real_time": 3.1315658740197300e-01, - "cpu_time": 3.1313880939716321e-01, + "iterations": 1854, + "real_time": 3.9794566280569205e-01, + "cpu_time": 3.9791046979503841e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 822, - "real_time": 8.0446341050077241e-01, - "cpu_time": 8.0443746593674081e-01, + "iterations": 667, + "real_time": 1.0182474372626424e+00, + "cpu_time": 1.0181817661169434e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 842, - "real_time": 7.9947983461151217e-01, - "cpu_time": 7.9944346437054481e-01, + "iterations": 648, + "real_time": 1.0037174172423504e+00, + "cpu_time": 1.0036660324074091e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5153, - "real_time": 1.3539784398329691e-01, - "cpu_time": 1.3539321715505537e-01, + "iterations": 4893, + "real_time": 1.4206621359638610e-01, + "cpu_time": 1.4206494400163472e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3158, - "real_time": 2.1987897889515204e-01, - "cpu_time": 2.1986695028499065e-01, + "iterations": 3067, + "real_time": 2.2388874809128631e-01, + "cpu_time": 2.2388449331594412e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3163, - "real_time": 2.2139365028400343e-01, - "cpu_time": 2.2138768827062899e-01, + "iterations": 3150, + "real_time": 2.2265229433301895e-01, + "cpu_time": 2.2264321619047678e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2727, - "real_time": 2.5654320311734297e-01, - "cpu_time": 2.5653315914924757e-01, + "iterations": 2705, + "real_time": 2.5939554141321375e-01, + "cpu_time": 2.5939025988909481e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2742, - "real_time": 2.5476464549287403e-01, - "cpu_time": 2.5475962800875301e-01, + "iterations": 2695, + "real_time": 2.5867627785024483e-01, + "cpu_time": 2.5867045009276496e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2826, - "real_time": 2.4769082797265440e-01, - "cpu_time": 2.4768630750176840e-01, + "iterations": 2673, + "real_time": 2.6060673423889108e-01, + "cpu_time": 2.6059759932659970e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5148, - "real_time": 1.3564416607545157e-01, - "cpu_time": 1.3563850446775436e-01, + "iterations": 4862, + "real_time": 1.4264437518263001e-01, + "cpu_time": 1.4263860037021783e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 9b6ab275..f52a2204 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:06:16+00:00 +2025-09-07T13:09:33+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.13, 1.26, 2.06 +Load Average: 3.04, 3.90, 5.68 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.77 ms 4.77 ms 147 -MLIR_Conv2D/1 7.20 ms 7.20 ms 97 -Buddy_Conv2D/1 0.310 ms 0.310 ms 2252 -Buddy_Corr2D_Constant_Padding/1 0.802 ms 0.802 ms 868 -OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 560 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4854 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2649 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105099 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49931 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3253 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3245 -Buddy_Opening2D_Constant_Padding/1 0.316 ms 0.316 ms 2229 -Buddy_Closing2D_Constant_Padding/1 0.313 ms 0.313 ms 2256 -Buddy_TopHat2D_Constant_Padding/1 0.804 ms 0.804 ms 822 -Buddy_BottomHat2D_Constant_Padding/1 0.799 ms 0.799 ms 842 -OpenCV_Erode2D_Constant_Padding/1 0.135 ms 0.135 ms 5153 -OpenCV_Opening2D_Constant_Padding/1 0.220 ms 0.220 ms 3158 -OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3163 -OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2727 -OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2742 -OpenCV_MorphGrad2D_Constant_Padding/1 0.248 ms 0.248 ms 2826 -OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5148 +Eigen_Convolve2D/1 5.14 ms 5.14 ms 137 +MLIR_Conv2D/1 7.57 ms 7.57 ms 92 +Buddy_Conv2D/1 0.322 ms 0.322 ms 2206 +Buddy_Corr2D_Constant_Padding/1 0.837 ms 0.837 ms 826 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 531 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4648 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2560 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102196 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 45300 +Buddy_Erosion2D_Constant_Padding/1 0.242 ms 0.242 ms 2831 +Buddy_Dilation2D_Constant_Padding/1 0.242 ms 0.242 ms 2893 +Buddy_Opening2D_Constant_Padding/1 0.397 ms 0.397 ms 1793 +Buddy_Closing2D_Constant_Padding/1 0.398 ms 0.398 ms 1854 +Buddy_TopHat2D_Constant_Padding/1 1.02 ms 1.02 ms 667 +Buddy_BottomHat2D_Constant_Padding/1 1.00 ms 1.00 ms 648 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4893 +OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3067 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3150 +OpenCV_TopHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2705 +OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2695 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2673 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4862 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 6ff72a1b..64dd32da 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:06:40+00:00", + "date": "2025-09-07T13:09:57+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.0835,1.23975,2.03369], + "load_avg": [2.94531,3.80908,5.59961], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 143, - "real_time": 4.9128080555400651e+00, - "cpu_time": 4.9125261748251745e+00, + "iterations": 134, + "real_time": 5.1393594815215069e+00, + "cpu_time": 5.1390401492537316e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 98, - "real_time": 7.1688128103102953e+00, - "cpu_time": 7.1684497448979574e+00, + "iterations": 91, + "real_time": 7.5410613483125033e+00, + "cpu_time": 7.5408332637362649e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2260, - "real_time": 3.0970742464461160e-01, - "cpu_time": 3.0970234823008863e-01, + "iterations": 2173, + "real_time": 3.2154843896915436e-01, + "cpu_time": 3.2153552692130705e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 875, - "real_time": 7.9474829563072746e-01, - "cpu_time": 7.9473616228571387e-01, + "iterations": 829, + "real_time": 8.4328651877594230e-01, + "cpu_time": 8.4327024969843234e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 560, - "real_time": 1.2486339812832219e+00, - "cpu_time": 1.2485831892857155e+00, + "iterations": 532, + "real_time": 1.3073873881222611e+00, + "cpu_time": 1.3073464718045120e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4871, - "real_time": 1.4330328636604078e-01, - "cpu_time": 1.4329935413672748e-01, + "iterations": 4585, + "real_time": 1.5132041605374286e-01, + "cpu_time": 1.5102619302071968e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2651, - "real_time": 2.6097617655576555e-01, - "cpu_time": 2.6096452583930574e-01, + "iterations": 2573, + "real_time": 2.7366480878758792e-01, + "cpu_time": 2.7364495919160520e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 104620, - "real_time": 6.6914885511215544e-03, - "cpu_time": 6.6913837507168832e-03, + "iterations": 101803, + "real_time": 6.8592732155106284e-03, + "cpu_time": 6.8590132805516511e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49783, - "real_time": 1.4034632352513720e-02, - "cpu_time": 1.4034190526886668e-02, + "iterations": 48122, + "real_time": 1.4427633615687751e-02, + "cpu_time": 1.4427098915257043e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3101, - "real_time": 2.1835735265342160e-01, - "cpu_time": 2.1835113382779719e-01, + "iterations": 2888, + "real_time": 2.4509608972600952e-01, + "cpu_time": 2.4508256613573420e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3270, - "real_time": 2.1373488703907811e-01, - "cpu_time": 2.1372451253822639e-01, + "iterations": 2928, + "real_time": 2.4123604490823758e-01, + "cpu_time": 2.4122515266393438e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2180, - "real_time": 3.1882228346866204e-01, - "cpu_time": 3.1881019403669730e-01, + "iterations": 1789, + "real_time": 3.8186785993528072e-01, + "cpu_time": 3.8185716601453323e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2262, - "real_time": 3.1177783887224003e-01, - "cpu_time": 3.1175983333333290e-01, + "iterations": 1769, + "real_time": 3.9249769353543112e-01, + "cpu_time": 3.9247566817410967e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 841, - "real_time": 8.1441226314562820e-01, - "cpu_time": 8.1438480975029681e-01, + "iterations": 664, + "real_time": 1.0108141711049050e+00, + "cpu_time": 1.0107552921686753e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 849, - "real_time": 8.1958065360117571e-01, - "cpu_time": 8.1955526266195466e-01, + "iterations": 673, + "real_time": 9.8877163246871813e-01, + "cpu_time": 9.8869461069836506e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5157, - "real_time": 1.3543392134447857e-01, - "cpu_time": 1.3543092495636982e-01, + "iterations": 4863, + "real_time": 1.4223833171006905e-01, + "cpu_time": 1.4223450503804233e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3187, - "real_time": 2.1921325934355934e-01, - "cpu_time": 2.1920577282711085e-01, + "iterations": 3100, + "real_time": 2.4483322376205074e-01, + "cpu_time": 2.4482809935483879e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3207, - "real_time": 2.1817837297303080e-01, - "cpu_time": 2.1817245837231020e-01, + "iterations": 3060, + "real_time": 2.2852179880430495e-01, + "cpu_time": 2.2851666633986922e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2745, - "real_time": 2.5516484061659794e-01, - "cpu_time": 2.5515719052823399e-01, + "iterations": 2675, + "real_time": 2.5958791375160217e-01, + "cpu_time": 2.5957632560747695e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2766, - "real_time": 2.5253382525578499e-01, - "cpu_time": 2.5252827476500311e-01, + "iterations": 2709, + "real_time": 2.6177893945897096e-01, + "cpu_time": 2.6177052639350257e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2808, - "real_time": 2.4893482221009214e-01, - "cpu_time": 2.4892937749287819e-01, + "iterations": 2658, + "real_time": 2.6277054109637948e-01, + "cpu_time": 2.6275918924003028e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5102, - "real_time": 1.3665569254705739e-01, - "cpu_time": 1.3665094943159542e-01, + "iterations": 4890, + "real_time": 1.4260715456462345e-01, + "cpu_time": 1.4260182985685071e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index c4b29991..e75240ac 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:06:40+00:00 +2025-09-07T13:09:57+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.08, 1.24, 2.03 +Load Average: 2.95, 3.81, 5.60 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.91 ms 4.91 ms 143 -MLIR_Conv2D/1 7.17 ms 7.17 ms 98 -Buddy_Conv2D/1 0.310 ms 0.310 ms 2260 -Buddy_Corr2D_Constant_Padding/1 0.795 ms 0.795 ms 875 -OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 560 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4871 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2651 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104620 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49783 -Buddy_Erosion2D_Constant_Padding/1 0.218 ms 0.218 ms 3101 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3270 -Buddy_Opening2D_Constant_Padding/1 0.319 ms 0.319 ms 2180 -Buddy_Closing2D_Constant_Padding/1 0.312 ms 0.312 ms 2262 -Buddy_TopHat2D_Constant_Padding/1 0.814 ms 0.814 ms 841 -Buddy_BottomHat2D_Constant_Padding/1 0.820 ms 0.820 ms 849 -OpenCV_Erode2D_Constant_Padding/1 0.135 ms 0.135 ms 5157 -OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3187 -OpenCV_Closing2D_Constant_Padding/1 0.218 ms 0.218 ms 3207 -OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2745 -OpenCV_BottomHat2D_Constant_Padding/1 0.253 ms 0.253 ms 2766 -OpenCV_MorphGrad2D_Constant_Padding/1 0.249 ms 0.249 ms 2808 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5102 +Eigen_Convolve2D/1 5.14 ms 5.14 ms 134 +MLIR_Conv2D/1 7.54 ms 7.54 ms 91 +Buddy_Conv2D/1 0.322 ms 0.322 ms 2173 +Buddy_Corr2D_Constant_Padding/1 0.843 ms 0.843 ms 829 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 532 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.151 ms 0.151 ms 4585 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2573 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101803 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48122 +Buddy_Erosion2D_Constant_Padding/1 0.245 ms 0.245 ms 2888 +Buddy_Dilation2D_Constant_Padding/1 0.241 ms 0.241 ms 2928 +Buddy_Opening2D_Constant_Padding/1 0.382 ms 0.382 ms 1789 +Buddy_Closing2D_Constant_Padding/1 0.392 ms 0.392 ms 1769 +Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 664 +Buddy_BottomHat2D_Constant_Padding/1 0.989 ms 0.989 ms 673 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4863 +OpenCV_Opening2D_Constant_Padding/1 0.245 ms 0.245 ms 3100 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3060 +OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2675 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2709 +OpenCV_MorphGrad2D_Constant_Padding/1 0.263 ms 0.263 ms 2658 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4890 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index b9088742..38474c73 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:07:04+00:00", + "date": "2025-09-07T13:10:21+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.0542,1.21924,2.00439], + "load_avg": [2.95947,3.74316,5.5293], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 60, - "real_time": 1.1551547423005104e+01, - "cpu_time": 1.1550767250000002e+01, + "iterations": 58, + "real_time": 1.2066526531145490e+01, + "cpu_time": 1.2065775413793103e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 24, - "real_time": 2.9155427357181907e+01, - "cpu_time": 2.9154965874999991e+01, + "iterations": 23, + "real_time": 3.0403987545034159e+01, + "cpu_time": 3.0402858130434790e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 536, - "real_time": 1.3085197372389819e+00, - "cpu_time": 1.3084801436567159e+00, + "iterations": 532, + "real_time": 1.3182804870762324e+00, + "cpu_time": 1.3182079266917295e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 300, - "real_time": 2.3306774348020554e+00, - "cpu_time": 2.3306083266666664e+00, + "iterations": 287, + "real_time": 2.4341225805805951e+00, + "cpu_time": 2.4340057770034864e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 170, - "real_time": 4.1107838863835617e+00, - "cpu_time": 4.1106729823529387e+00, + "iterations": 163, + "real_time": 4.3055439379317626e+00, + "cpu_time": 4.3053028404907954e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4854, - "real_time": 1.4268877356937504e-01, - "cpu_time": 1.4268450350226619e-01, + "iterations": 4634, + "real_time": 1.5098747890161243e-01, + "cpu_time": 1.5098142684505816e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2689, - "real_time": 2.6141061780398644e-01, - "cpu_time": 2.6139920379323189e-01, + "iterations": 2565, + "real_time": 2.7393707115980154e-01, + "cpu_time": 2.7392365964912280e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105080, - "real_time": 6.6532106587232909e-03, - "cpu_time": 6.6528616768176665e-03, + "iterations": 101737, + "real_time": 6.8683102937117163e-03, + "cpu_time": 6.8680850231479179e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49721, - "real_time": 1.4036623706550731e-02, - "cpu_time": 1.4035942217574073e-02, + "iterations": 47105, + "real_time": 1.4881931008863394e-02, + "cpu_time": 1.4881426748752757e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3235, - "real_time": 2.1639395004262144e-01, - "cpu_time": 2.1638765935084969e-01, + "iterations": 3021, + "real_time": 2.3487997751526388e-01, + "cpu_time": 2.3487021383647805e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3260, - "real_time": 2.1409400641643928e-01, - "cpu_time": 2.1408867822085853e-01, + "iterations": 2916, + "real_time": 2.3379066731622353e-01, + "cpu_time": 2.3377707304526726e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2249, - "real_time": 3.0992835057794171e-01, - "cpu_time": 3.0992358070253456e-01, + "iterations": 1884, + "real_time": 3.7098769691719369e-01, + "cpu_time": 3.7097218789808861e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2139, - "real_time": 3.1172140541061055e-01, - "cpu_time": 3.1170241748480609e-01, + "iterations": 1844, + "real_time": 3.9446013504861493e-01, + "cpu_time": 3.9443875271149736e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 826, - "real_time": 7.8035605570738886e-01, - "cpu_time": 7.8032884140435832e-01, + "iterations": 686, + "real_time": 1.0319062326722521e+00, + "cpu_time": 1.0318697463556861e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 830, - "real_time": 7.8195727104882162e-01, - "cpu_time": 7.8193199518072320e-01, + "iterations": 644, + "real_time": 1.0185169970026668e+00, + "cpu_time": 1.0184906350931668e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5049, - "real_time": 1.3847981230424972e-01, - "cpu_time": 1.3847714319667237e-01, + "iterations": 4867, + "real_time": 1.4380763933745305e-01, + "cpu_time": 1.4380276741319087e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3095, - "real_time": 2.2573410586028184e-01, - "cpu_time": 2.2572844491114658e-01, + "iterations": 3143, + "real_time": 2.2292424454417459e-01, + "cpu_time": 2.2291499936366541e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3109, - "real_time": 2.2543651156963815e-01, - "cpu_time": 2.2543004728208380e-01, + "iterations": 3113, + "real_time": 2.2401934842032478e-01, + "cpu_time": 2.2401028268551296e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2690, - "real_time": 2.6003630437150765e-01, - "cpu_time": 2.6002171301115301e-01, + "iterations": 2714, + "real_time": 2.5358747803470888e-01, + "cpu_time": 2.5357193478260892e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2688, - "real_time": 2.6027442418992341e-01, - "cpu_time": 2.6026942299107153e-01, + "iterations": 2759, + "real_time": 2.5393757948887524e-01, + "cpu_time": 2.5392501087350461e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2759, - "real_time": 2.5280932354058594e-01, - "cpu_time": 2.5280226567597014e-01, + "iterations": 2635, + "real_time": 2.6606651733450914e-01, + "cpu_time": 2.6604915483870911e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5116, - "real_time": 1.3649669214442336e-01, - "cpu_time": 1.3649272341673235e-01, + "iterations": 4941, + "real_time": 1.4199837635123663e-01, + "cpu_time": 1.4199401133373810e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 3e54ca71..f82ee0db 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:07:04+00:00 +2025-09-07T13:10:21+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.05, 1.22, 2.00 +Load Average: 2.96, 3.74, 5.53 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.6 ms 11.6 ms 60 -MLIR_Conv2D/1 29.2 ms 29.2 ms 24 -Buddy_Conv2D/1 1.31 ms 1.31 ms 536 -Buddy_Corr2D_Constant_Padding/1 2.33 ms 2.33 ms 300 -OpenCV_Filter2D_Constant_Padding/1 4.11 ms 4.11 ms 170 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4854 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2689 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105080 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49721 -Buddy_Erosion2D_Constant_Padding/1 0.216 ms 0.216 ms 3235 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3260 -Buddy_Opening2D_Constant_Padding/1 0.310 ms 0.310 ms 2249 -Buddy_Closing2D_Constant_Padding/1 0.312 ms 0.312 ms 2139 -Buddy_TopHat2D_Constant_Padding/1 0.780 ms 0.780 ms 826 -Buddy_BottomHat2D_Constant_Padding/1 0.782 ms 0.782 ms 830 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5049 -OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3095 -OpenCV_Closing2D_Constant_Padding/1 0.225 ms 0.225 ms 3109 -OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2690 -OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2688 -OpenCV_MorphGrad2D_Constant_Padding/1 0.253 ms 0.253 ms 2759 -OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5116 +Eigen_Convolve2D/1 12.1 ms 12.1 ms 58 +MLIR_Conv2D/1 30.4 ms 30.4 ms 23 +Buddy_Conv2D/1 1.32 ms 1.32 ms 532 +Buddy_Corr2D_Constant_Padding/1 2.43 ms 2.43 ms 287 +OpenCV_Filter2D_Constant_Padding/1 4.31 ms 4.31 ms 163 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.151 ms 0.151 ms 4634 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2565 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101737 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47105 +Buddy_Erosion2D_Constant_Padding/1 0.235 ms 0.235 ms 3021 +Buddy_Dilation2D_Constant_Padding/1 0.234 ms 0.234 ms 2916 +Buddy_Opening2D_Constant_Padding/1 0.371 ms 0.371 ms 1884 +Buddy_Closing2D_Constant_Padding/1 0.394 ms 0.394 ms 1844 +Buddy_TopHat2D_Constant_Padding/1 1.03 ms 1.03 ms 686 +Buddy_BottomHat2D_Constant_Padding/1 1.02 ms 1.02 ms 644 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4867 +OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3143 +OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3113 +OpenCV_TopHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2714 +OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2759 +OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2635 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4941 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 9063bd22..4515ca1c 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:07:28+00:00", + "date": "2025-09-07T13:10:45+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.03467,1.2002,1.97705], + "load_avg": [2.63086,3.60156,5.43457], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 60, - "real_time": 1.1487875661502281e+01, - "cpu_time": 1.1487358733333334e+01, + "iterations": 58, + "real_time": 1.2157393234043285e+01, + "cpu_time": 1.2156209275862070e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 24, - "real_time": 2.9097553264970582e+01, - "cpu_time": 2.9096759458333334e+01, + "iterations": 23, + "real_time": 3.0382284651631895e+01, + "cpu_time": 3.0381463956521745e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 508, - "real_time": 1.3769478097147361e+00, - "cpu_time": 1.3769043287401574e+00, + "iterations": 566, + "real_time": 1.2311481281641095e+00, + "cpu_time": 1.2311096890459359e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 301, - "real_time": 2.3193448484934049e+00, - "cpu_time": 2.3192444285714293e+00, + "iterations": 287, + "real_time": 2.4388498052279708e+00, + "cpu_time": 2.4387647142857145e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 170, - "real_time": 4.1048574535285729e+00, - "cpu_time": 4.1047252352941150e+00, + "iterations": 163, + "real_time": 4.2787142600749899e+00, + "cpu_time": 4.2785227607361991e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4857, - "real_time": 1.4277608228768002e-01, - "cpu_time": 1.4277288326127241e-01, + "iterations": 4617, + "real_time": 1.4999322794928086e-01, + "cpu_time": 1.4998539831059118e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2686, - "real_time": 2.6132201128212801e-01, - "cpu_time": 2.6131442181682785e-01, + "iterations": 2561, + "real_time": 2.7250940011423669e-01, + "cpu_time": 2.7250318976962118e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105064, - "real_time": 6.6562829248287755e-03, - "cpu_time": 6.6561413709738835e-03, + "iterations": 101810, + "real_time": 6.8337629754936027e-03, + "cpu_time": 6.8336145270601984e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49925, - "real_time": 1.4008483739871052e-02, - "cpu_time": 1.4008090896344512e-02, + "iterations": 48577, + "real_time": 1.4544123902229474e-02, + "cpu_time": 1.4543825637647435e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3267, - "real_time": 2.1407247116469375e-01, - "cpu_time": 2.1406563881236618e-01, + "iterations": 2990, + "real_time": 2.3945177154596833e-01, + "cpu_time": 2.3944178795986570e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3269, - "real_time": 2.1407944222917322e-01, - "cpu_time": 2.1407154512083224e-01, + "iterations": 2958, + "real_time": 2.3566706432249998e-01, + "cpu_time": 2.3565873157538894e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2235, - "real_time": 3.1582788383000648e-01, - "cpu_time": 3.1582353601789687e-01, + "iterations": 1884, + "real_time": 3.8850708664918909e-01, + "cpu_time": 3.8848084235668817e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2209, - "real_time": 3.1493820297253949e-01, - "cpu_time": 3.1491848709823395e-01, + "iterations": 1811, + "real_time": 3.7958499091491721e-01, + "cpu_time": 3.7957069243511937e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 841, - "real_time": 8.0088509399528029e-01, - "cpu_time": 8.0083224970273426e-01, + "iterations": 725, + "real_time": 1.0013685658060272e+00, + "cpu_time": 1.0012686234482775e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 852, - "real_time": 7.8501773368514760e-01, - "cpu_time": 7.8499030399061098e-01, + "iterations": 686, + "real_time": 9.9782866721771901e-01, + "cpu_time": 9.9774830320699592e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5129, - "real_time": 1.3646853262975123e-01, - "cpu_time": 1.3646564866445693e-01, + "iterations": 4854, + "real_time": 1.4177612733168027e-01, + "cpu_time": 1.4176570457354778e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3105, - "real_time": 2.2545198155582047e-01, - "cpu_time": 2.2544327149758506e-01, + "iterations": 3120, + "real_time": 2.2748470760117739e-01, + "cpu_time": 2.2747433525640967e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3082, - "real_time": 2.2723694984051099e-01, - "cpu_time": 2.2723045360155805e-01, + "iterations": 3017, + "real_time": 2.2835247798949074e-01, + "cpu_time": 2.2833371925754087e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2679, - "real_time": 2.6105903089046478e-01, - "cpu_time": 2.6105058268010423e-01, + "iterations": 2621, + "real_time": 2.6303982573217766e-01, + "cpu_time": 2.6302844982830992e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2672, - "real_time": 2.6178982354209807e-01, - "cpu_time": 2.6178556474550924e-01, + "iterations": 2639, + "real_time": 2.6201856276355667e-01, + "cpu_time": 2.6201338423645221e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2751, - "real_time": 2.5383601799981026e-01, - "cpu_time": 2.5382809596510303e-01, + "iterations": 2588, + "real_time": 2.6833365197758402e-01, + "cpu_time": 2.6831637287480692e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5094, - "real_time": 1.3636100326431091e-01, - "cpu_time": 1.3635804652532396e-01, + "iterations": 4864, + "real_time": 1.4279707791406268e-01, + "cpu_time": 1.4278683675986900e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 0ea453df..89be8078 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:07:28+00:00 +2025-09-07T13:10:45+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.03, 1.20, 1.98 +Load Average: 2.63, 3.60, 5.43 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.5 ms 11.5 ms 60 -MLIR_Conv2D/1 29.1 ms 29.1 ms 24 -Buddy_Conv2D/1 1.38 ms 1.38 ms 508 -Buddy_Corr2D_Constant_Padding/1 2.32 ms 2.32 ms 301 -OpenCV_Filter2D_Constant_Padding/1 4.10 ms 4.10 ms 170 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4857 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2686 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105064 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49925 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3267 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3269 -Buddy_Opening2D_Constant_Padding/1 0.316 ms 0.316 ms 2235 -Buddy_Closing2D_Constant_Padding/1 0.315 ms 0.315 ms 2209 -Buddy_TopHat2D_Constant_Padding/1 0.801 ms 0.801 ms 841 -Buddy_BottomHat2D_Constant_Padding/1 0.785 ms 0.785 ms 852 -OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5129 -OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3105 -OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3082 -OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2679 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2672 -OpenCV_MorphGrad2D_Constant_Padding/1 0.254 ms 0.254 ms 2751 -OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5094 +Eigen_Convolve2D/1 12.2 ms 12.2 ms 58 +MLIR_Conv2D/1 30.4 ms 30.4 ms 23 +Buddy_Conv2D/1 1.23 ms 1.23 ms 566 +Buddy_Corr2D_Constant_Padding/1 2.44 ms 2.44 ms 287 +OpenCV_Filter2D_Constant_Padding/1 4.28 ms 4.28 ms 163 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4617 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2561 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101810 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 48577 +Buddy_Erosion2D_Constant_Padding/1 0.239 ms 0.239 ms 2990 +Buddy_Dilation2D_Constant_Padding/1 0.236 ms 0.236 ms 2958 +Buddy_Opening2D_Constant_Padding/1 0.389 ms 0.388 ms 1884 +Buddy_Closing2D_Constant_Padding/1 0.380 ms 0.380 ms 1811 +Buddy_TopHat2D_Constant_Padding/1 1.00 ms 1.00 ms 725 +Buddy_BottomHat2D_Constant_Padding/1 0.998 ms 0.998 ms 686 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4854 +OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 3120 +OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 3017 +OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2621 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2639 +OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2588 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4864 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 2ed991ba..ab377824 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:07:52+00:00", + "date": "2025-09-07T13:11:10+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.02393,1.18652,1.95557], + "load_avg": [2.49512,3.48877,5.34766], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 31, - "real_time": 2.1483107620189266e+01, - "cpu_time": 2.1482540354838711e+01, + "real_time": 2.2698959636111415e+01, + "cpu_time": 2.2698047322580649e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 11, - "real_time": 6.6662094640460879e+01, - "cpu_time": 6.6661596272727294e+01, + "iterations": 10, + "real_time": 6.9128799811005592e+01, + "cpu_time": 6.9123686800000002e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 312, - "real_time": 2.2404276921103397e+00, - "cpu_time": 2.2403709903846161e+00, + "iterations": 303, + "real_time": 2.3153245744138662e+00, + "cpu_time": 2.3151734257425751e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 150, - "real_time": 4.6722977980971336e+00, - "cpu_time": 4.6722623466666642e+00, + "iterations": 145, + "real_time": 4.8356864472915388e+00, + "cpu_time": 4.8354401310344803e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 81, - "real_time": 8.6062704615386920e+00, - "cpu_time": 8.6059275802469042e+00, + "iterations": 78, + "real_time": 9.0294344207415218e+00, + "cpu_time": 9.0290102307692237e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4847, - "real_time": 1.4253131920759618e-01, - "cpu_time": 1.4252792098205075e-01, + "iterations": 4647, + "real_time": 1.4970531277895641e-01, + "cpu_time": 1.4969578330105443e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2686, - "real_time": 2.6136482784930931e-01, - "cpu_time": 2.6135366939687277e-01, + "iterations": 2568, + "real_time": 2.7367153358747281e-01, + "cpu_time": 2.7365799805295959e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105200, - "real_time": 6.6543605086524680e-03, - "cpu_time": 6.6542239543726190e-03, + "iterations": 101049, + "real_time": 6.8725437557921798e-03, + "cpu_time": 6.8721282645053334e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49717, - "real_time": 1.4051802086674765e-02, - "cpu_time": 1.4051148580968290e-02, + "iterations": 48490, + "real_time": 1.6256974490309873e-02, + "cpu_time": 1.6256173540936274e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3275, - "real_time": 2.1293145497791641e-01, - "cpu_time": 2.1291816183206122e-01, + "iterations": 2701, + "real_time": 2.5604368194038452e-01, + "cpu_time": 2.5602443983709744e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3279, - "real_time": 2.1357018615313150e-01, - "cpu_time": 2.1355237785910328e-01, + "iterations": 2749, + "real_time": 2.5215670845863297e-01, + "cpu_time": 2.5215243179337943e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2249, - "real_time": 3.0966895674270434e-01, - "cpu_time": 3.0966282303245918e-01, + "iterations": 1779, + "real_time": 4.4195852156031190e-01, + "cpu_time": 4.4191965486228230e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2267, - "real_time": 3.1410488470514597e-01, - "cpu_time": 3.1408243140714548e-01, + "iterations": 1747, + "real_time": 4.3807942345610468e-01, + "cpu_time": 4.3805371150543754e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 827, - "real_time": 7.8919717755974994e-01, - "cpu_time": 7.8917250060459587e-01, + "iterations": 686, + "real_time": 1.0212933530612869e+00, + "cpu_time": 1.0212425087463568e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 845, - "real_time": 7.6306458099706642e-01, - "cpu_time": 7.6302103786982345e-01, + "iterations": 678, + "real_time": 1.0082492423532283e+00, + "cpu_time": 1.0082195176991142e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5188, - "real_time": 1.3468034979050769e-01, - "cpu_time": 1.3467689070932889e-01, + "iterations": 4907, + "real_time": 1.4108774486252751e-01, + "cpu_time": 1.4108310148767086e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3054, - "real_time": 2.2879175807653726e-01, - "cpu_time": 2.2878261100196512e-01, + "iterations": 3151, + "real_time": 2.2556108071742759e-01, + "cpu_time": 2.2554901872421484e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3052, - "real_time": 2.2905985098503051e-01, - "cpu_time": 2.2905445674967184e-01, + "iterations": 3125, + "real_time": 2.2625566363334657e-01, + "cpu_time": 2.2624866016000056e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2667, - "real_time": 2.6218339879055974e-01, - "cpu_time": 2.6217200074990588e-01, + "iterations": 2563, + "real_time": 2.6822397270985893e-01, + "cpu_time": 2.6821861724541574e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2674, - "real_time": 2.6174741828495118e-01, - "cpu_time": 2.6173521727748611e-01, + "iterations": 2586, + "real_time": 2.6990216274364737e-01, + "cpu_time": 2.6988920069605538e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2759, - "real_time": 2.5373818531724240e-01, - "cpu_time": 2.5372534541500574e-01, + "iterations": 2636, + "real_time": 2.6586177378712006e-01, + "cpu_time": 2.6585147420333838e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5114, - "real_time": 1.3672967110299766e-01, - "cpu_time": 1.3672548572545917e-01, + "iterations": 4852, + "real_time": 1.4311890367354033e-01, + "cpu_time": 1.4311650906842524e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index eb8ab690..70ceea58 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:07:52+00:00 +2025-09-07T13:11:10+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.19, 1.96 +Load Average: 2.50, 3.49, 5.35 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 21.5 ms 21.5 ms 31 -MLIR_Conv2D/1 66.7 ms 66.7 ms 11 -Buddy_Conv2D/1 2.24 ms 2.24 ms 312 -Buddy_Corr2D_Constant_Padding/1 4.67 ms 4.67 ms 150 -OpenCV_Filter2D_Constant_Padding/1 8.61 ms 8.61 ms 81 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4847 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2686 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105200 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49717 -Buddy_Erosion2D_Constant_Padding/1 0.213 ms 0.213 ms 3275 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3279 -Buddy_Opening2D_Constant_Padding/1 0.310 ms 0.310 ms 2249 -Buddy_Closing2D_Constant_Padding/1 0.314 ms 0.314 ms 2267 -Buddy_TopHat2D_Constant_Padding/1 0.789 ms 0.789 ms 827 -Buddy_BottomHat2D_Constant_Padding/1 0.763 ms 0.763 ms 845 -OpenCV_Erode2D_Constant_Padding/1 0.135 ms 0.135 ms 5188 -OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3054 -OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3052 -OpenCV_TopHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2667 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2674 -OpenCV_MorphGrad2D_Constant_Padding/1 0.254 ms 0.254 ms 2759 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5114 +Eigen_Convolve2D/1 22.7 ms 22.7 ms 31 +MLIR_Conv2D/1 69.1 ms 69.1 ms 10 +Buddy_Conv2D/1 2.32 ms 2.32 ms 303 +Buddy_Corr2D_Constant_Padding/1 4.84 ms 4.84 ms 145 +OpenCV_Filter2D_Constant_Padding/1 9.03 ms 9.03 ms 78 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4647 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2568 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101049 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.016 ms 0.016 ms 48490 +Buddy_Erosion2D_Constant_Padding/1 0.256 ms 0.256 ms 2701 +Buddy_Dilation2D_Constant_Padding/1 0.252 ms 0.252 ms 2749 +Buddy_Opening2D_Constant_Padding/1 0.442 ms 0.442 ms 1779 +Buddy_Closing2D_Constant_Padding/1 0.438 ms 0.438 ms 1747 +Buddy_TopHat2D_Constant_Padding/1 1.02 ms 1.02 ms 686 +Buddy_BottomHat2D_Constant_Padding/1 1.01 ms 1.01 ms 678 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4907 +OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3151 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3125 +OpenCV_TopHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2563 +OpenCV_BottomHat2D_Constant_Padding/1 0.270 ms 0.270 ms 2586 +OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2636 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4852 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 6b3c7804..71ca4698 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:08:16+00:00", + "date": "2025-09-07T13:11:35+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.01514,1.17041,1.92871], + "load_avg": [2.59424,3.43213,5.27881], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 32, - "real_time": 2.1583709167316556e+01, - "cpu_time": 2.1582785937499999e+01, + "iterations": 31, + "real_time": 2.2505127254032320e+01, + "cpu_time": 2.2503875387096770e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 10, - "real_time": 6.6664919815957546e+01, - "cpu_time": 6.6663032099999995e+01, + "real_time": 6.9353722408413887e+01, + "cpu_time": 6.9350002200000006e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 299, - "real_time": 2.3427403229833845e+00, - "cpu_time": 2.3426287826086951e+00, + "iterations": 294, + "real_time": 2.3809186860817628e+00, + "cpu_time": 2.3807380238095233e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 150, - "real_time": 4.6730378891030950e+00, - "cpu_time": 4.6729712266666672e+00, + "iterations": 145, + "real_time": 4.8076913788400848e+00, + "cpu_time": 4.8074658482758617e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 81, - "real_time": 8.5960494837275263e+00, - "cpu_time": 8.5955845432098688e+00, + "iterations": 77, + "real_time": 9.0552678742966091e+00, + "cpu_time": 9.0549782467532474e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4853, - "real_time": 1.4254460892320145e-01, - "cpu_time": 1.4253487471667009e-01, + "iterations": 4641, + "real_time": 1.5078150530591963e-01, + "cpu_time": 1.5077307412195642e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2693, - "real_time": 2.6023447322681675e-01, - "cpu_time": 2.6022185480876342e-01, + "iterations": 2561, + "real_time": 2.7482259155995487e-01, + "cpu_time": 2.7480107887543898e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105142, - "real_time": 6.6607927815345129e-03, - "cpu_time": 6.6606426451846111e-03, + "iterations": 102307, + "real_time": 6.8564598795962314e-03, + "cpu_time": 6.8562642438933811e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 50003, - "real_time": 1.4013960396383099e-02, - "cpu_time": 1.4013452032878017e-02, + "iterations": 48525, + "real_time": 1.4494471271157203e-02, + "cpu_time": 1.4493903987635266e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3229, - "real_time": 2.1446620847611059e-01, - "cpu_time": 2.1445776308454609e-01, + "iterations": 2896, + "real_time": 2.4304971181226698e-01, + "cpu_time": 2.4304034495856303e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3262, - "real_time": 2.1735038653993666e-01, - "cpu_time": 2.1733955395462870e-01, + "iterations": 2854, + "real_time": 2.4745649362495920e-01, + "cpu_time": 2.4743058479327271e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2262, - "real_time": 3.0764156028588824e-01, - "cpu_time": 3.0763562245800136e-01, + "iterations": 1734, + "real_time": 4.0102887129563636e-01, + "cpu_time": 4.0101697808535247e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2236, - "real_time": 3.0961510690332311e-01, - "cpu_time": 3.0960087119856816e-01, + "iterations": 1783, + "real_time": 4.0142091886420900e-01, + "cpu_time": 4.0141442512619180e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 855, - "real_time": 7.7676248480702004e-01, - "cpu_time": 7.7674190058479520e-01, + "iterations": 652, + "real_time": 1.0057697529715994e+00, + "cpu_time": 1.0057539616564428e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 826, - "real_time": 7.9636755636182877e-01, - "cpu_time": 7.9630684140435715e-01, + "iterations": 668, + "real_time": 1.0270904693864062e+00, + "cpu_time": 1.0270344431137737e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5125, - "real_time": 1.3618563297318248e-01, - "cpu_time": 1.3618100897560950e-01, + "iterations": 4769, + "real_time": 1.5437182739711353e-01, + "cpu_time": 1.5436570203396952e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3079, - "real_time": 2.2738466971529966e-01, - "cpu_time": 2.2736882591750601e-01, + "iterations": 2758, + "real_time": 2.2677743187453800e-01, + "cpu_time": 2.2676782378535207e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3097, - "real_time": 2.2597094918975608e-01, - "cpu_time": 2.2596501485308346e-01, + "iterations": 3085, + "real_time": 2.2627895447384788e-01, + "cpu_time": 2.2626945510534760e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2680, - "real_time": 2.6094424677317712e-01, - "cpu_time": 2.6093131977611894e-01, + "iterations": 2647, + "real_time": 2.6378509263790550e-01, + "cpu_time": 2.6376938307517955e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2694, - "real_time": 2.5972596120086527e-01, - "cpu_time": 2.5972145916852268e-01, + "iterations": 2665, + "real_time": 2.6252820920094316e-01, + "cpu_time": 2.6251876210131225e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2766, - "real_time": 2.5288170083700018e-01, - "cpu_time": 2.5287643926247261e-01, + "iterations": 2628, + "real_time": 2.6982235956137585e-01, + "cpu_time": 2.6981582077625482e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4993, - "real_time": 1.3998398830005837e-01, - "cpu_time": 1.3997966553174421e-01, + "iterations": 4915, + "real_time": 1.4328015390271936e-01, + "cpu_time": 1.4327507243133245e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 0b7b2543..e226cf28 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:08:16+00:00 +2025-09-07T13:11:35+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.17, 1.93 +Load Average: 2.59, 3.43, 5.28 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 21.6 ms 21.6 ms 32 -MLIR_Conv2D/1 66.7 ms 66.7 ms 10 -Buddy_Conv2D/1 2.34 ms 2.34 ms 299 -Buddy_Corr2D_Constant_Padding/1 4.67 ms 4.67 ms 150 -OpenCV_Filter2D_Constant_Padding/1 8.60 ms 8.60 ms 81 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4853 -Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2693 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105142 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 50003 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3229 -Buddy_Dilation2D_Constant_Padding/1 0.217 ms 0.217 ms 3262 -Buddy_Opening2D_Constant_Padding/1 0.308 ms 0.308 ms 2262 -Buddy_Closing2D_Constant_Padding/1 0.310 ms 0.310 ms 2236 -Buddy_TopHat2D_Constant_Padding/1 0.777 ms 0.777 ms 855 -Buddy_BottomHat2D_Constant_Padding/1 0.796 ms 0.796 ms 826 -OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5125 -OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 3079 -OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3097 -OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2680 -OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2694 -OpenCV_MorphGrad2D_Constant_Padding/1 0.253 ms 0.253 ms 2766 -OpenCV_Dilate2D_Constant_Padding/1 0.140 ms 0.140 ms 4993 +Eigen_Convolve2D/1 22.5 ms 22.5 ms 31 +MLIR_Conv2D/1 69.4 ms 69.4 ms 10 +Buddy_Conv2D/1 2.38 ms 2.38 ms 294 +Buddy_Corr2D_Constant_Padding/1 4.81 ms 4.81 ms 145 +OpenCV_Filter2D_Constant_Padding/1 9.06 ms 9.05 ms 77 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.151 ms 0.151 ms 4641 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2561 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102307 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48525 +Buddy_Erosion2D_Constant_Padding/1 0.243 ms 0.243 ms 2896 +Buddy_Dilation2D_Constant_Padding/1 0.247 ms 0.247 ms 2854 +Buddy_Opening2D_Constant_Padding/1 0.401 ms 0.401 ms 1734 +Buddy_Closing2D_Constant_Padding/1 0.401 ms 0.401 ms 1783 +Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 652 +Buddy_BottomHat2D_Constant_Padding/1 1.03 ms 1.03 ms 668 +OpenCV_Erode2D_Constant_Padding/1 0.154 ms 0.154 ms 4769 +OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 2758 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3085 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2647 +OpenCV_BottomHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2665 +OpenCV_MorphGrad2D_Constant_Padding/1 0.270 ms 0.270 ms 2628 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4915 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 524f2941..86e96502 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:08:40+00:00", + "date": "2025-09-07T13:12:00+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00879,1.15576,1.90234], + "load_avg": [2.81396,3.41357,5.22266], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 21, - "real_time": 3.4179400918739184e+01, - "cpu_time": 3.4179150904761904e+01, + "iterations": 19, + "real_time": 3.5926808652124905e+01, + "cpu_time": 3.5924946894736848e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 6, - "real_time": 1.1945824356128772e+02, - "cpu_time": 1.1945671849999998e+02, + "real_time": 1.2407693142692249e+02, + "cpu_time": 1.2407143333333333e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 179, - "real_time": 3.9118556045620134e+00, - "cpu_time": 3.9117745865921782e+00, + "iterations": 174, + "real_time": 4.0391601365188077e+00, + "cpu_time": 4.0390005804597706e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 90, - "real_time": 7.7935500484373836e+00, - "cpu_time": 7.7934364222222205e+00, + "iterations": 86, + "real_time": 8.1013258286686831e+00, + "cpu_time": 8.1008378488372106e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 119, - "real_time": 5.8913991450011229e+00, - "cpu_time": 5.8910765714285711e+00, + "iterations": 115, + "real_time": 5.9959024190902710e+00, + "cpu_time": 5.9957746086956556e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4837, - "real_time": 1.4248181189144105e-01, - "cpu_time": 1.4247768864998964e-01, + "iterations": 4686, + "real_time": 1.4948366170910449e-01, + "cpu_time": 1.4947746414852756e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2692, - "real_time": 2.6031898093834677e-01, - "cpu_time": 2.6030977674591388e-01, + "iterations": 2562, + "real_time": 2.7427126585878869e-01, + "cpu_time": 2.7425979039812648e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105099, - "real_time": 6.6508404265951922e-03, - "cpu_time": 6.6507078278575452e-03, + "iterations": 101826, + "real_time": 6.8600357443400397e-03, + "cpu_time": 6.8597227525386429e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49521, - "real_time": 1.4164025459980720e-02, - "cpu_time": 1.4163677369196903e-02, + "iterations": 47952, + "real_time": 1.4625412179234826e-02, + "cpu_time": 1.4625162954621277e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3257, - "real_time": 2.1447988594734929e-01, - "cpu_time": 2.1447408136321755e-01, + "iterations": 2816, + "real_time": 2.4167992656161499e-01, + "cpu_time": 2.4167627130681854e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3222, - "real_time": 2.1550155167880078e-01, - "cpu_time": 2.1549107014276825e-01, + "iterations": 2869, + "real_time": 2.4284223862623411e-01, + "cpu_time": 2.4281936911816007e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2228, - "real_time": 3.2929575264694660e-01, - "cpu_time": 3.2929014048473909e-01, + "iterations": 1716, + "real_time": 3.8663555796329790e-01, + "cpu_time": 3.8661013053613069e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2221, - "real_time": 3.1350365177783168e-01, - "cpu_time": 3.1348973390364687e-01, + "iterations": 1845, + "real_time": 3.9366766366209116e-01, + "cpu_time": 3.9364567262872591e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 845, - "real_time": 7.8893084321501694e-01, - "cpu_time": 7.8890399999999949e-01, + "iterations": 681, + "real_time": 1.0337662909086931e+00, + "cpu_time": 1.0336980954478709e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 825, - "real_time": 7.9270076571088843e-01, - "cpu_time": 7.9265848606060751e-01, + "iterations": 743, + "real_time": 9.9821302984636850e-01, + "cpu_time": 9.9815262180349729e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5117, - "real_time": 1.3651657002860268e-01, - "cpu_time": 1.3651407563025220e-01, + "iterations": 4848, + "real_time": 1.4389439781039659e-01, + "cpu_time": 1.4388865037128742e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3176, - "real_time": 2.2036766208783354e-01, - "cpu_time": 2.2036350566750670e-01, + "iterations": 3034, + "real_time": 2.3212716666074198e-01, + "cpu_time": 2.3211989123269525e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3179, - "real_time": 2.1992300025657049e-01, - "cpu_time": 2.1991539792387513e-01, + "iterations": 3013, + "real_time": 2.3206682561087205e-01, + "cpu_time": 2.3205983637570513e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2758, - "real_time": 2.5396899977311266e-01, - "cpu_time": 2.5396026613487976e-01, + "iterations": 2605, + "real_time": 2.6449215475062260e-01, + "cpu_time": 2.6447615969289784e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2740, - "real_time": 2.5533607270378267e-01, - "cpu_time": 2.5533049343065611e-01, + "iterations": 2599, + "real_time": 2.6553470344670049e-01, + "cpu_time": 2.6552657983839900e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2779, - "real_time": 2.5107566979065266e-01, - "cpu_time": 2.5107116228859239e-01, + "iterations": 2579, + "real_time": 2.7055674661911472e-01, + "cpu_time": 2.7054348817371016e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5176, - "real_time": 1.3480490214627530e-01, - "cpu_time": 1.3480058597372524e-01, + "iterations": 4823, + "real_time": 1.4378969109011419e-01, + "cpu_time": 1.4378343479162370e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 7b1051c1..34454b9e 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:08:40+00:00 +2025-09-07T13:12:00+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.16, 1.90 +Load Average: 2.81, 3.41, 5.22 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 34.2 ms 34.2 ms 21 -MLIR_Conv2D/1 119 ms 119 ms 6 -Buddy_Conv2D/1 3.91 ms 3.91 ms 179 -Buddy_Corr2D_Constant_Padding/1 7.79 ms 7.79 ms 90 -OpenCV_Filter2D_Constant_Padding/1 5.89 ms 5.89 ms 119 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.142 ms 0.142 ms 4837 -Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2692 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105099 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49521 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3257 -Buddy_Dilation2D_Constant_Padding/1 0.216 ms 0.215 ms 3222 -Buddy_Opening2D_Constant_Padding/1 0.329 ms 0.329 ms 2228 -Buddy_Closing2D_Constant_Padding/1 0.314 ms 0.313 ms 2221 -Buddy_TopHat2D_Constant_Padding/1 0.789 ms 0.789 ms 845 -Buddy_BottomHat2D_Constant_Padding/1 0.793 ms 0.793 ms 825 -OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5117 -OpenCV_Opening2D_Constant_Padding/1 0.220 ms 0.220 ms 3176 -OpenCV_Closing2D_Constant_Padding/1 0.220 ms 0.220 ms 3179 -OpenCV_TopHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2758 -OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2740 -OpenCV_MorphGrad2D_Constant_Padding/1 0.251 ms 0.251 ms 2779 -OpenCV_Dilate2D_Constant_Padding/1 0.135 ms 0.135 ms 5176 +Eigen_Convolve2D/1 35.9 ms 35.9 ms 19 +MLIR_Conv2D/1 124 ms 124 ms 6 +Buddy_Conv2D/1 4.04 ms 4.04 ms 174 +Buddy_Corr2D_Constant_Padding/1 8.10 ms 8.10 ms 86 +OpenCV_Filter2D_Constant_Padding/1 6.00 ms 6.00 ms 115 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4686 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2562 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101826 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47952 +Buddy_Erosion2D_Constant_Padding/1 0.242 ms 0.242 ms 2816 +Buddy_Dilation2D_Constant_Padding/1 0.243 ms 0.243 ms 2869 +Buddy_Opening2D_Constant_Padding/1 0.387 ms 0.387 ms 1716 +Buddy_Closing2D_Constant_Padding/1 0.394 ms 0.394 ms 1845 +Buddy_TopHat2D_Constant_Padding/1 1.03 ms 1.03 ms 681 +Buddy_BottomHat2D_Constant_Padding/1 0.998 ms 0.998 ms 743 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4848 +OpenCV_Opening2D_Constant_Padding/1 0.232 ms 0.232 ms 3034 +OpenCV_Closing2D_Constant_Padding/1 0.232 ms 0.232 ms 3013 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2605 +OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2599 +OpenCV_MorphGrad2D_Constant_Padding/1 0.271 ms 0.271 ms 2579 +OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4823 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 6c942dba..2251ffdf 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:09:04+00:00", + "date": "2025-09-07T13:12:25+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00488,1.14209,1.87793], + "load_avg": [2.86768,3.38574,5.17383], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 20, - "real_time": 3.4315383434295654e+01, - "cpu_time": 3.4314020649999989e+01, + "iterations": 19, + "real_time": 3.6201816836470051e+01, + "cpu_time": 3.6201301105263163e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 6, - "real_time": 1.1923250618080299e+02, - "cpu_time": 1.1923025466666671e+02, + "real_time": 1.2358936294913292e+02, + "cpu_time": 1.2358072799999997e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 176, - "real_time": 3.9768879704008047e+00, - "cpu_time": 3.9767473920454539e+00, + "iterations": 164, + "real_time": 4.2949264369359827e+00, + "cpu_time": 4.2947466402439005e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 90, - "real_time": 7.7952975821163921e+00, - "cpu_time": 7.7950874333333360e+00, + "iterations": 87, + "real_time": 8.1035070035649444e+00, + "cpu_time": 8.1031966321839040e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 119, - "real_time": 5.8853118954335946e+00, - "cpu_time": 5.8850751092436999e+00, + "iterations": 114, + "real_time": 6.0135007166025929e+00, + "cpu_time": 6.0134500877193009e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4830, - "real_time": 1.4269068127586729e-01, - "cpu_time": 1.4268681076604545e-01, + "iterations": 4610, + "real_time": 1.5021250573130338e-01, + "cpu_time": 1.5020438633405644e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2690, - "real_time": 2.6056179049732958e-01, - "cpu_time": 2.6055384832713774e-01, + "iterations": 2555, + "real_time": 2.7383145549992527e-01, + "cpu_time": 2.7382016086105654e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105110, - "real_time": 6.6555711116360709e-03, - "cpu_time": 6.6552045476167832e-03, + "iterations": 101810, + "real_time": 6.8629608209563337e-03, + "cpu_time": 6.8627586877516959e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49449, - "real_time": 1.4160061664736937e-02, - "cpu_time": 1.4159483306032465e-02, + "iterations": 48007, + "real_time": 1.4609261997405542e-02, + "cpu_time": 1.4608820255379421e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3196, - "real_time": 2.1433003843576648e-01, - "cpu_time": 2.1432159981226551e-01, + "iterations": 2937, + "real_time": 2.5447246713381783e-01, + "cpu_time": 2.5446095233231197e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3263, - "real_time": 2.1439825375588656e-01, - "cpu_time": 2.1438984094391658e-01, + "iterations": 2835, + "real_time": 2.5096868265032557e-01, + "cpu_time": 2.5096189135802438e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2208, - "real_time": 3.1279467056383903e-01, - "cpu_time": 3.1279010869565232e-01, + "iterations": 1713, + "real_time": 4.0167628396552552e-01, + "cpu_time": 4.0166695913601819e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2187, - "real_time": 3.2674287674521879e-01, - "cpu_time": 3.2672890352080491e-01, + "iterations": 1732, + "real_time": 3.8730230733029564e-01, + "cpu_time": 3.8729183602771361e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 835, - "real_time": 8.0294330007658743e-01, - "cpu_time": 8.0290843592814365e-01, + "iterations": 678, + "real_time": 1.0340484875669169e+00, + "cpu_time": 1.0340169734513285e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 832, - "real_time": 7.9833573321453655e-01, - "cpu_time": 7.9829145552884551e-01, + "iterations": 688, + "real_time": 1.0315462307961181e+00, + "cpu_time": 1.0315082732558134e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5076, - "real_time": 1.3770647691907706e-01, - "cpu_time": 1.3770291745468863e-01, + "iterations": 4897, + "real_time": 1.4402208728451424e-01, + "cpu_time": 1.4401725382887512e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3051, - "real_time": 2.2880784661720635e-01, - "cpu_time": 2.2880403670927538e-01, + "iterations": 3175, + "real_time": 2.1922240811070121e-01, + "cpu_time": 2.1921639622047273e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3037, - "real_time": 2.3033768049011036e-01, - "cpu_time": 2.3032893085281514e-01, + "iterations": 3139, + "real_time": 2.1930135176530421e-01, + "cpu_time": 2.1929385313794220e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2671, - "real_time": 2.6261604532950916e-01, - "cpu_time": 2.6260133545488590e-01, + "iterations": 2708, + "real_time": 2.5645486922436661e-01, + "cpu_time": 2.5644250147710440e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2661, - "real_time": 2.6305766453513613e-01, - "cpu_time": 2.6304995978955348e-01, + "iterations": 2786, + "real_time": 2.5142605061464318e-01, + "cpu_time": 2.5141716403445868e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2719, - "real_time": 2.5722212437300523e-01, - "cpu_time": 2.5721654983449854e-01, + "iterations": 2645, + "real_time": 2.6354777762254378e-01, + "cpu_time": 2.6354556105860183e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5132, - "real_time": 1.3590039066906470e-01, - "cpu_time": 1.3589483982852715e-01, + "iterations": 4951, + "real_time": 1.4152701088597716e-01, + "cpu_time": 1.4151987901434010e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 86b65056..f84fcd1c 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:09:04+00:00 +2025-09-07T13:12:25+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.14, 1.88 +Load Average: 2.87, 3.39, 5.17 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 34.3 ms 34.3 ms 20 -MLIR_Conv2D/1 119 ms 119 ms 6 -Buddy_Conv2D/1 3.98 ms 3.98 ms 176 -Buddy_Corr2D_Constant_Padding/1 7.80 ms 7.80 ms 90 -OpenCV_Filter2D_Constant_Padding/1 5.89 ms 5.89 ms 119 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4830 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2690 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105110 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49449 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3196 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3263 -Buddy_Opening2D_Constant_Padding/1 0.313 ms 0.313 ms 2208 -Buddy_Closing2D_Constant_Padding/1 0.327 ms 0.327 ms 2187 -Buddy_TopHat2D_Constant_Padding/1 0.803 ms 0.803 ms 835 -Buddy_BottomHat2D_Constant_Padding/1 0.798 ms 0.798 ms 832 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5076 -OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3051 -OpenCV_Closing2D_Constant_Padding/1 0.230 ms 0.230 ms 3037 -OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2671 -OpenCV_BottomHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2661 -OpenCV_MorphGrad2D_Constant_Padding/1 0.257 ms 0.257 ms 2719 -OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5132 +Eigen_Convolve2D/1 36.2 ms 36.2 ms 19 +MLIR_Conv2D/1 124 ms 124 ms 6 +Buddy_Conv2D/1 4.29 ms 4.29 ms 164 +Buddy_Corr2D_Constant_Padding/1 8.10 ms 8.10 ms 87 +OpenCV_Filter2D_Constant_Padding/1 6.01 ms 6.01 ms 114 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4610 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2555 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101810 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 48007 +Buddy_Erosion2D_Constant_Padding/1 0.254 ms 0.254 ms 2937 +Buddy_Dilation2D_Constant_Padding/1 0.251 ms 0.251 ms 2835 +Buddy_Opening2D_Constant_Padding/1 0.402 ms 0.402 ms 1713 +Buddy_Closing2D_Constant_Padding/1 0.387 ms 0.387 ms 1732 +Buddy_TopHat2D_Constant_Padding/1 1.03 ms 1.03 ms 678 +Buddy_BottomHat2D_Constant_Padding/1 1.03 ms 1.03 ms 688 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4897 +OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3175 +OpenCV_Closing2D_Constant_Padding/1 0.219 ms 0.219 ms 3139 +OpenCV_TopHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2708 +OpenCV_BottomHat2D_Constant_Padding/1 0.251 ms 0.251 ms 2786 +OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2645 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4951 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 7b43959b..fbe2c35c 100644 --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:03:44+00:00", + "date": "2025-09-07T13:06:58+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.01025,1.3501,2.23389], + "load_avg": [2.79346,4.37109,6.12695], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 148, - "real_time": 4.7447106272384927e+00, - "cpu_time": 4.7446774594594583e+00, + "iterations": 140, + "real_time": 5.0267200917005539e+00, + "cpu_time": 5.0265640428571432e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 97, - "real_time": 7.1993130691272693e+00, - "cpu_time": 7.1992214536082475e+00, + "iterations": 91, + "real_time": 7.6496493849125535e+00, + "cpu_time": 7.6493698021978016e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 994, - "real_time": 7.0498319121613584e-01, - "cpu_time": 7.0496927162977874e-01, + "iterations": 1044, + "real_time": 7.0535207802422661e-01, + "cpu_time": 7.0532346839080440e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 652, - "real_time": 1.0683491848911977e+00, - "cpu_time": 1.0683190337423316e+00, + "iterations": 603, + "real_time": 1.1493037354392595e+00, + "cpu_time": 1.1492314079601984e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 376, - "real_time": 1.8636996540775959e+00, - "cpu_time": 1.8636467952127671e+00, + "iterations": 351, + "real_time": 1.9643567396365000e+00, + "cpu_time": 1.9642499658119676e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4854, - "real_time": 1.4280247857750419e-01, - "cpu_time": 1.4279905850844668e-01, + "iterations": 4666, + "real_time": 1.4894739499150353e-01, + "cpu_time": 1.4894085683669100e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2692, - "real_time": 2.6015500866020591e-01, - "cpu_time": 2.6014284992570597e-01, + "iterations": 2568, + "real_time": 2.7369540561907391e-01, + "cpu_time": 2.7368365965732100e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105153, - "real_time": 6.6558956463923079e-03, - "cpu_time": 6.6557461983966244e-03, + "iterations": 100789, + "real_time": 6.8543536302857408e-03, + "cpu_time": 6.8541224836043569e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49887, - "real_time": 1.4016378462514136e-02, - "cpu_time": 1.4016046244512574e-02, + "iterations": 48298, + "real_time": 1.4474783446770128e-02, + "cpu_time": 1.4474468797879825e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3272, - "real_time": 2.1622067517748964e-01, - "cpu_time": 2.1621826436430350e-01, + "iterations": 2759, + "real_time": 2.4659659722958324e-01, + "cpu_time": 2.4658607828923515e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3249, - "real_time": 2.1524323226764189e-01, - "cpu_time": 2.1523652816251135e-01, + "iterations": 2891, + "real_time": 2.4580984498464042e-01, + "cpu_time": 2.4580015496368032e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2259, - "real_time": 3.1131270476388107e-01, - "cpu_time": 3.1130147410358577e-01, + "iterations": 1723, + "real_time": 4.0453681489858112e-01, + "cpu_time": 4.0451324260011656e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2229, - "real_time": 3.0684158683973023e-01, - "cpu_time": 3.0682693808882905e-01, + "iterations": 1732, + "real_time": 4.1367237694888415e-01, + "cpu_time": 4.1365702655889142e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 858, - "real_time": 7.7721029780544604e-01, - "cpu_time": 7.7715368881118829e-01, + "iterations": 638, + "real_time": 1.0285404420291369e+00, + "cpu_time": 1.0285238934169287e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 831, - "real_time": 7.6740273643199863e-01, - "cpu_time": 7.6736787845968901e-01, + "iterations": 644, + "real_time": 1.0358063990007276e+00, + "cpu_time": 1.0357866428571438e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5114, - "real_time": 1.3575900606825617e-01, - "cpu_time": 1.3575761967149003e-01, + "iterations": 4822, + "real_time": 1.4633293893637869e-01, + "cpu_time": 1.4632941041061789e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3131, - "real_time": 2.2333534568291205e-01, - "cpu_time": 2.2333002331523552e-01, + "iterations": 2981, + "real_time": 2.2754417554278855e-01, + "cpu_time": 2.2754064776920505e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3149, - "real_time": 2.2213427404934885e-01, - "cpu_time": 2.2212822959669751e-01, + "iterations": 3123, + "real_time": 2.2576934456100001e-01, + "cpu_time": 2.2575589721421732e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2744, - "real_time": 2.5495233790780641e-01, - "cpu_time": 2.5493706669096206e-01, + "iterations": 2693, + "real_time": 2.6050619877849951e-01, + "cpu_time": 2.6050195915336039e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2738, - "real_time": 2.5553990242536090e-01, - "cpu_time": 2.5553167786705649e-01, + "iterations": 2699, + "real_time": 2.5437106994312308e-01, + "cpu_time": 2.5436423193775537e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2822, - "real_time": 2.4826389233813398e-01, - "cpu_time": 2.4825289546420984e-01, + "iterations": 2677, + "real_time": 2.6176411200691879e-01, + "cpu_time": 2.6176175046694000e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5112, - "real_time": 1.3681590347222880e-01, - "cpu_time": 1.3681292938184636e-01, + "iterations": 4871, + "real_time": 1.4305631132053415e-01, + "cpu_time": 1.4305312009854276e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index e18829e1..82b87d75 100644 --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:03:44+00:00 +2025-09-07T13:06:58+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.35, 2.23 +Load Average: 2.79, 4.37, 6.13 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.74 ms 4.74 ms 148 -MLIR_Conv2D/1 7.20 ms 7.20 ms 97 -Buddy_Conv2D/1 0.705 ms 0.705 ms 994 -Buddy_Corr2D_Constant_Padding/1 1.07 ms 1.07 ms 652 -OpenCV_Filter2D_Constant_Padding/1 1.86 ms 1.86 ms 376 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4854 -Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2692 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105153 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49887 -Buddy_Erosion2D_Constant_Padding/1 0.216 ms 0.216 ms 3272 -Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3249 -Buddy_Opening2D_Constant_Padding/1 0.311 ms 0.311 ms 2259 -Buddy_Closing2D_Constant_Padding/1 0.307 ms 0.307 ms 2229 -Buddy_TopHat2D_Constant_Padding/1 0.777 ms 0.777 ms 858 -Buddy_BottomHat2D_Constant_Padding/1 0.767 ms 0.767 ms 831 -OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5114 -OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3131 -OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3149 -OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2744 -OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2738 -OpenCV_MorphGrad2D_Constant_Padding/1 0.248 ms 0.248 ms 2822 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5112 +Eigen_Convolve2D/1 5.03 ms 5.03 ms 140 +MLIR_Conv2D/1 7.65 ms 7.65 ms 91 +Buddy_Conv2D/1 0.705 ms 0.705 ms 1044 +Buddy_Corr2D_Constant_Padding/1 1.15 ms 1.15 ms 603 +OpenCV_Filter2D_Constant_Padding/1 1.96 ms 1.96 ms 351 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4666 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2568 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100789 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48298 +Buddy_Erosion2D_Constant_Padding/1 0.247 ms 0.247 ms 2759 +Buddy_Dilation2D_Constant_Padding/1 0.246 ms 0.246 ms 2891 +Buddy_Opening2D_Constant_Padding/1 0.405 ms 0.405 ms 1723 +Buddy_Closing2D_Constant_Padding/1 0.414 ms 0.414 ms 1732 +Buddy_TopHat2D_Constant_Padding/1 1.03 ms 1.03 ms 638 +Buddy_BottomHat2D_Constant_Padding/1 1.04 ms 1.04 ms 644 +OpenCV_Erode2D_Constant_Padding/1 0.146 ms 0.146 ms 4822 +OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 2981 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3123 +OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2693 +OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2699 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2677 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4871 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index eeafbc1a..e07a24a2 100644 --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:04:08+00:00", + "date": "2025-09-07T13:07:22+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00537,1.3208,2.19971], + "load_avg": [2.99609,4.29199,6.05322], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 144, - "real_time": 4.8768702480528088e+00, - "cpu_time": 4.8767275416666651e+00, + "iterations": 139, + "real_time": 4.9621713140027985e+00, + "cpu_time": 4.9619625107913672e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 97, - "real_time": 7.2076135343804806e+00, - "cpu_time": 7.2073728865979385e+00, + "iterations": 93, + "real_time": 7.5804082456455433e+00, + "cpu_time": 7.5800841397849492e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 988, - "real_time": 7.0670188257568756e-01, - "cpu_time": 7.0667706781376538e-01, + "iterations": 1031, + "real_time": 6.8261508335775822e-01, + "cpu_time": 6.8259750145489806e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 668, - "real_time": 1.0503804350193746e+00, - "cpu_time": 1.0503256976047903e+00, + "iterations": 633, + "real_time": 1.1063351650467792e+00, + "cpu_time": 1.1062885023696678e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 376, - "real_time": 1.8618261055188610e+00, - "cpu_time": 1.8617787287234049e+00, + "iterations": 357, + "real_time": 1.9599260861466246e+00, + "cpu_time": 1.9598406246498592e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4847, - "real_time": 1.4253342164861363e-01, - "cpu_time": 1.4253089168557878e-01, + "iterations": 4635, + "real_time": 1.4986117664792906e-01, + "cpu_time": 1.4983970463861931e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2676, - "real_time": 2.6066214687041994e-01, - "cpu_time": 2.6065746524663669e-01, + "iterations": 2549, + "real_time": 2.7376641481004071e-01, + "cpu_time": 2.7375588662220468e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 104914, - "real_time": 6.6579347712314094e-03, - "cpu_time": 6.6577343633833427e-03, + "iterations": 102375, + "real_time": 7.3848935128422739e-03, + "cpu_time": 7.3845735286935292e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49862, - "real_time": 1.4023264916892000e-02, - "cpu_time": 1.4022960731619289e-02, + "iterations": 48287, + "real_time": 1.4458113028899603e-02, + "cpu_time": 1.4457668357943133e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3188, - "real_time": 2.1308184504378097e-01, - "cpu_time": 2.1307364052697600e-01, + "iterations": 2811, + "real_time": 2.5138499600231667e-01, + "cpu_time": 2.5137254215581684e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3259, - "real_time": 2.1614176719859540e-01, - "cpu_time": 2.1612986836452897e-01, + "iterations": 2642, + "real_time": 2.6127819555623344e-01, + "cpu_time": 2.6126649242997735e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2184, - "real_time": 3.1719380228920080e-01, - "cpu_time": 3.1718530998168509e-01, + "iterations": 1671, + "real_time": 4.1013477868408876e-01, + "cpu_time": 4.1010871035308216e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2136, - "real_time": 3.1429035187967486e-01, - "cpu_time": 3.1427967602996248e-01, + "iterations": 1687, + "real_time": 4.3260783981697476e-01, + "cpu_time": 4.3259696087729665e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 814, - "real_time": 7.8621265965831955e-01, - "cpu_time": 7.8619211547911549e-01, + "iterations": 632, + "real_time": 1.0608510892319529e+00, + "cpu_time": 1.0608173591772148e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 847, - "real_time": 7.9931024739072343e-01, - "cpu_time": 7.9924577449822931e-01, + "iterations": 650, + "real_time": 1.0382979993636792e+00, + "cpu_time": 1.0382782784615385e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5040, - "real_time": 1.3869987608539680e-01, - "cpu_time": 1.3869627519841246e-01, + "iterations": 4927, + "real_time": 1.4230332964691400e-01, + "cpu_time": 1.4229967972397004e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3163, - "real_time": 2.2131044840439565e-01, - "cpu_time": 2.2130652987669977e-01, + "iterations": 3177, + "real_time": 2.2380305398437336e-01, + "cpu_time": 2.2379998048473357e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3197, - "real_time": 2.1893111420844844e-01, - "cpu_time": 2.1892508163903554e-01, + "iterations": 3189, + "real_time": 2.2100701731886374e-01, + "cpu_time": 2.2100314989024730e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2741, - "real_time": 2.5511631760467623e-01, - "cpu_time": 2.5510120357533728e-01, + "iterations": 2722, + "real_time": 2.5795674680962799e-01, + "cpu_time": 2.5794806943423992e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2735, - "real_time": 2.5600316360511116e-01, - "cpu_time": 2.5599668336380238e-01, + "iterations": 2649, + "real_time": 2.6164399358001283e-01, + "cpu_time": 2.6163493733484300e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2817, - "real_time": 2.4800523348552012e-01, - "cpu_time": 2.4799882215122357e-01, + "iterations": 2636, + "real_time": 2.6538427240796081e-01, + "cpu_time": 2.6537967336874019e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5127, - "real_time": 1.3664415676923483e-01, - "cpu_time": 1.3663955119953172e-01, + "iterations": 4833, + "real_time": 1.4401911400426576e-01, + "cpu_time": 1.4401146348023988e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index f9f26c26..8d5da69b 100644 --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:04:08+00:00 +2025-09-07T13:07:22+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.01, 1.32, 2.20 +Load Average: 3.00, 4.29, 6.05 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.88 ms 4.88 ms 144 -MLIR_Conv2D/1 7.21 ms 7.21 ms 97 -Buddy_Conv2D/1 0.707 ms 0.707 ms 988 -Buddy_Corr2D_Constant_Padding/1 1.05 ms 1.05 ms 668 -OpenCV_Filter2D_Constant_Padding/1 1.86 ms 1.86 ms 376 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4847 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2676 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104914 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49862 -Buddy_Erosion2D_Constant_Padding/1 0.213 ms 0.213 ms 3188 -Buddy_Dilation2D_Constant_Padding/1 0.216 ms 0.216 ms 3259 -Buddy_Opening2D_Constant_Padding/1 0.317 ms 0.317 ms 2184 -Buddy_Closing2D_Constant_Padding/1 0.314 ms 0.314 ms 2136 -Buddy_TopHat2D_Constant_Padding/1 0.786 ms 0.786 ms 814 -Buddy_BottomHat2D_Constant_Padding/1 0.799 ms 0.799 ms 847 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5040 -OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3163 -OpenCV_Closing2D_Constant_Padding/1 0.219 ms 0.219 ms 3197 -OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2741 -OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2735 -OpenCV_MorphGrad2D_Constant_Padding/1 0.248 ms 0.248 ms 2817 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5127 +Eigen_Convolve2D/1 4.96 ms 4.96 ms 139 +MLIR_Conv2D/1 7.58 ms 7.58 ms 93 +Buddy_Conv2D/1 0.683 ms 0.683 ms 1031 +Buddy_Corr2D_Constant_Padding/1 1.11 ms 1.11 ms 633 +OpenCV_Filter2D_Constant_Padding/1 1.96 ms 1.96 ms 357 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4635 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2549 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102375 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48287 +Buddy_Erosion2D_Constant_Padding/1 0.251 ms 0.251 ms 2811 +Buddy_Dilation2D_Constant_Padding/1 0.261 ms 0.261 ms 2642 +Buddy_Opening2D_Constant_Padding/1 0.410 ms 0.410 ms 1671 +Buddy_Closing2D_Constant_Padding/1 0.433 ms 0.433 ms 1687 +Buddy_TopHat2D_Constant_Padding/1 1.06 ms 1.06 ms 632 +Buddy_BottomHat2D_Constant_Padding/1 1.04 ms 1.04 ms 650 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4927 +OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3177 +OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3189 +OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2722 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2649 +OpenCV_MorphGrad2D_Constant_Padding/1 0.265 ms 0.265 ms 2636 +OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4833 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 3a96a344..494e78a9 100644 --- a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:04:31+00:00", + "date": "2025-09-07T13:07:46+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.00342,1.29932,2.17285], + "load_avg": [3.30029,4.25293,5.99219], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 60, - "real_time": 1.1527189146727324e+01, - "cpu_time": 1.1526637466666667e+01, + "iterations": 58, + "real_time": 1.2046421396321264e+01, + "cpu_time": 1.2045839844827588e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 24, - "real_time": 2.9093050320322316e+01, - "cpu_time": 2.9092513625000009e+01, + "iterations": 23, + "real_time": 3.0488935015771698e+01, + "cpu_time": 3.0488128913043482e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 343, - "real_time": 2.0371641713919515e+00, - "cpu_time": 2.0370861661807580e+00, + "iterations": 311, + "real_time": 2.2170821450339253e+00, + "cpu_time": 2.2169166109324756e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 400, - "real_time": 1.7372224666178226e+00, - "cpu_time": 1.7371669650000010e+00, + "iterations": 361, + "real_time": 1.9300536064229843e+00, + "cpu_time": 1.9299647423822710e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 261, - "real_time": 2.6778692086994420e+00, - "cpu_time": 2.6778029348658996e+00, + "iterations": 245, + "real_time": 2.8560382979256764e+00, + "cpu_time": 2.8558862775510190e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4858, - "real_time": 1.4252335135278607e-01, - "cpu_time": 1.4251939584191020e-01, + "iterations": 4623, + "real_time": 1.4924907078996716e-01, + "cpu_time": 1.4924138178671856e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2687, - "real_time": 2.6088196868829050e-01, - "cpu_time": 2.6087599590621513e-01, + "iterations": 2574, + "real_time": 2.7157393339862174e-01, + "cpu_time": 2.7156123815073813e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 104992, - "real_time": 6.6678219976420809e-03, - "cpu_time": 6.6676330577567803e-03, + "iterations": 101787, + "real_time": 6.8410452289965062e-03, + "cpu_time": 6.8409221511587948e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49870, - "real_time": 1.4044699138275052e-02, - "cpu_time": 1.4044414618006824e-02, + "iterations": 48536, + "real_time": 1.4426544273662984e-02, + "cpu_time": 1.4426188993736632e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3259, - "real_time": 2.1626527674653920e-01, - "cpu_time": 2.1625656489720799e-01, + "iterations": 2911, + "real_time": 2.5185101356182016e-01, + "cpu_time": 2.5184438096873979e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3239, - "real_time": 2.1562600803342119e-01, - "cpu_time": 2.1561170608212396e-01, + "iterations": 2915, + "real_time": 2.4232492489839294e-01, + "cpu_time": 2.4231269228130314e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2223, - "real_time": 3.2025049270888573e-01, - "cpu_time": 3.2023937067026542e-01, + "iterations": 1864, + "real_time": 3.9819089791741497e-01, + "cpu_time": 3.9817836051502159e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2211, - "real_time": 3.0800477782292152e-01, - "cpu_time": 3.0799415920398016e-01, + "iterations": 1848, + "real_time": 3.8097190037692263e-01, + "cpu_time": 3.8095753138528204e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 836, - "real_time": 7.9134119558705096e-01, - "cpu_time": 7.9132698803827617e-01, + "iterations": 696, + "real_time": 9.8893719179363082e-01, + "cpu_time": 9.8892107614942482e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 841, - "real_time": 8.0551134198726848e-01, - "cpu_time": 8.0547060642092871e-01, + "iterations": 672, + "real_time": 9.8550423336703152e-01, + "cpu_time": 9.8546595089285938e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5076, - "real_time": 1.3788279668494788e-01, - "cpu_time": 1.3788056402679286e-01, + "iterations": 4801, + "real_time": 1.5243401774588389e-01, + "cpu_time": 1.5243083399291801e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3086, - "real_time": 2.2677640289995318e-01, - "cpu_time": 2.2677006156837323e-01, + "iterations": 3052, + "real_time": 2.2796939614169096e-01, + "cpu_time": 2.2795512254259492e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3114, - "real_time": 2.2484443892767281e-01, - "cpu_time": 2.2483772286448303e-01, + "iterations": 3093, + "real_time": 2.3295791055483223e-01, + "cpu_time": 2.3295580375040367e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2653, - "real_time": 2.6417284279983089e-01, - "cpu_time": 2.6416585978137841e-01, + "iterations": 2685, + "real_time": 2.6288813108394488e-01, + "cpu_time": 2.6288156722532574e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2674, - "real_time": 2.6183896513464922e-01, - "cpu_time": 2.6182971316379983e-01, + "iterations": 2626, + "real_time": 2.6564019032089381e-01, + "cpu_time": 2.6563777837014435e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2741, - "real_time": 2.5503337451225211e-01, - "cpu_time": 2.5502575738781558e-01, + "iterations": 2529, + "real_time": 2.7600545458239312e-01, + "cpu_time": 2.7599608105970802e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5067, - "real_time": 1.3808122899832517e-01, - "cpu_time": 1.3807692401815616e-01, + "iterations": 4868, + "real_time": 1.4438264488243333e-01, + "cpu_time": 1.4438140488907125e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 62938745..5b86f3a7 100644 --- a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:04:31+00:00 +2025-09-07T13:07:46+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.30, 2.17 +Load Average: 3.30, 4.25, 5.99 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.5 ms 11.5 ms 60 -MLIR_Conv2D/1 29.1 ms 29.1 ms 24 -Buddy_Conv2D/1 2.04 ms 2.04 ms 343 -Buddy_Corr2D_Constant_Padding/1 1.74 ms 1.74 ms 400 -OpenCV_Filter2D_Constant_Padding/1 2.68 ms 2.68 ms 261 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4858 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2687 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104992 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49870 -Buddy_Erosion2D_Constant_Padding/1 0.216 ms 0.216 ms 3259 -Buddy_Dilation2D_Constant_Padding/1 0.216 ms 0.216 ms 3239 -Buddy_Opening2D_Constant_Padding/1 0.320 ms 0.320 ms 2223 -Buddy_Closing2D_Constant_Padding/1 0.308 ms 0.308 ms 2211 -Buddy_TopHat2D_Constant_Padding/1 0.791 ms 0.791 ms 836 -Buddy_BottomHat2D_Constant_Padding/1 0.806 ms 0.805 ms 841 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5076 -OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 3086 -OpenCV_Closing2D_Constant_Padding/1 0.225 ms 0.225 ms 3114 -OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2653 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2674 -OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2741 -OpenCV_Dilate2D_Constant_Padding/1 0.138 ms 0.138 ms 5067 +Eigen_Convolve2D/1 12.0 ms 12.0 ms 58 +MLIR_Conv2D/1 30.5 ms 30.5 ms 23 +Buddy_Conv2D/1 2.22 ms 2.22 ms 311 +Buddy_Corr2D_Constant_Padding/1 1.93 ms 1.93 ms 361 +OpenCV_Filter2D_Constant_Padding/1 2.86 ms 2.86 ms 245 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4623 +Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2574 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101787 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48536 +Buddy_Erosion2D_Constant_Padding/1 0.252 ms 0.252 ms 2911 +Buddy_Dilation2D_Constant_Padding/1 0.242 ms 0.242 ms 2915 +Buddy_Opening2D_Constant_Padding/1 0.398 ms 0.398 ms 1864 +Buddy_Closing2D_Constant_Padding/1 0.381 ms 0.381 ms 1848 +Buddy_TopHat2D_Constant_Padding/1 0.989 ms 0.989 ms 696 +Buddy_BottomHat2D_Constant_Padding/1 0.986 ms 0.985 ms 672 +OpenCV_Erode2D_Constant_Padding/1 0.152 ms 0.152 ms 4801 +OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3052 +OpenCV_Closing2D_Constant_Padding/1 0.233 ms 0.233 ms 3093 +OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2685 +OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2626 +OpenCV_MorphGrad2D_Constant_Padding/1 0.276 ms 0.276 ms 2529 +OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4868 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 1f531150..7321fe74 100644 --- a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:04:55+00:00", + "date": "2025-09-07T13:08:11+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.05908,1.28955,2.14648], + "load_avg": [3.32373,4.18311,5.92236], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 61, - "real_time": 1.1576852715406261e+01, - "cpu_time": 1.1576403065573771e+01, + "iterations": 57, + "real_time": 1.2125408309593535e+01, + "cpu_time": 1.2124991000000001e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 24, - "real_time": 2.8955480316653848e+01, - "cpu_time": 2.8955074333333339e+01, + "iterations": 23, + "real_time": 3.0467491596937180e+01, + "cpu_time": 3.0466511304347815e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 337, - "real_time": 2.0792299345445207e+00, - "cpu_time": 2.0791793204747768e+00, + "iterations": 308, + "real_time": 2.2320119518931811e+00, + "cpu_time": 2.2319393344155842e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 399, - "real_time": 1.7465614295917047e+00, - "cpu_time": 1.7465200451127822e+00, + "iterations": 374, + "real_time": 1.8465650133430001e+00, + "cpu_time": 1.8464962647058809e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 261, - "real_time": 2.6776142832305698e+00, - "cpu_time": 2.6775001149425290e+00, + "iterations": 249, + "real_time": 2.7999833196281907e+00, + "cpu_time": 2.7998932971887567e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4856, - "real_time": 1.4251252004180159e-01, - "cpu_time": 1.4250735070016479e-01, + "iterations": 4622, + "real_time": 1.5002440166313480e-01, + "cpu_time": 1.5001801579402854e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2688, - "real_time": 2.6082554818241899e-01, - "cpu_time": 2.6081198883928558e-01, + "iterations": 2559, + "real_time": 2.7215578194799345e-01, + "cpu_time": 2.7214380500195384e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 104682, - "real_time": 6.6886387324274427e-03, - "cpu_time": 6.6884607382358017e-03, + "iterations": 101510, + "real_time": 6.8781714849361188e-03, + "cpu_time": 6.8779688700620614e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49744, - "real_time": 1.4085311272764809e-02, - "cpu_time": 1.4084703180283075e-02, + "iterations": 48333, + "real_time": 1.4528235841763767e-02, + "cpu_time": 1.4527689549583108e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3240, - "real_time": 2.1877774658302465e-01, - "cpu_time": 2.1876813796296318e-01, + "iterations": 2975, + "real_time": 2.4345158654100754e-01, + "cpu_time": 2.4343695798319329e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3213, - "real_time": 2.1521158405100488e-01, - "cpu_time": 2.1520321693121730e-01, + "iterations": 2932, + "real_time": 2.4039734226091172e-01, + "cpu_time": 2.4038767871759889e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2240, - "real_time": 3.0795027185896678e-01, - "cpu_time": 3.0794031473214295e-01, + "iterations": 1773, + "real_time": 3.8597488842034705e-01, + "cpu_time": 3.8596146587704461e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2269, - "real_time": 3.0903523125381605e-01, - "cpu_time": 3.0902282150727212e-01, + "iterations": 1877, + "real_time": 3.7591625749270968e-01, + "cpu_time": 3.7591039158231215e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 841, - "real_time": 8.1986287447062456e-01, - "cpu_time": 8.1985148275862063e-01, + "iterations": 708, + "real_time": 9.9595875537159750e-01, + "cpu_time": 9.9593150423728927e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 846, - "real_time": 8.0027264360135897e-01, - "cpu_time": 8.0021359219858113e-01, + "iterations": 677, + "real_time": 9.7564913805689735e-01, + "cpu_time": 9.7561057311669175e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5072, - "real_time": 1.3771428277110828e-01, - "cpu_time": 1.3771015950315443e-01, + "iterations": 4933, + "real_time": 1.4562633877082792e-01, + "cpu_time": 1.4562218649908790e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3139, - "real_time": 2.2283822592370409e-01, - "cpu_time": 2.2282934246575339e-01, + "iterations": 3075, + "real_time": 2.2444998709166922e-01, + "cpu_time": 2.2444326016260174e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3074, - "real_time": 2.2787362380739770e-01, - "cpu_time": 2.2786527260897943e-01, + "iterations": 3052, + "real_time": 2.3302362516502878e-01, + "cpu_time": 2.3301645871559634e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2680, - "real_time": 2.6127572115788711e-01, - "cpu_time": 2.6126877089552214e-01, + "iterations": 2673, + "real_time": 2.6300297809637224e-01, + "cpu_time": 2.6299144257388679e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2676, - "real_time": 2.6178410588603562e-01, - "cpu_time": 2.6178113677129977e-01, + "iterations": 2683, + "real_time": 2.5624285049061701e-01, + "cpu_time": 2.5623349832277248e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2755, - "real_time": 2.5373509190909010e-01, - "cpu_time": 2.5373201778584381e-01, + "iterations": 2594, + "real_time": 2.6831575996065105e-01, + "cpu_time": 2.6831080185042405e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5189, - "real_time": 1.3500250305920858e-01, - "cpu_time": 1.3499600346887636e-01, + "iterations": 4916, + "real_time": 1.4157932588150096e-01, + "cpu_time": 1.4157454414157802e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 60130c33..eb8ac2f9 100644 --- a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:04:55+00:00 +2025-09-07T13:08:11+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.06, 1.29, 2.15 +Load Average: 3.32, 4.18, 5.92 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.6 ms 11.6 ms 61 -MLIR_Conv2D/1 29.0 ms 29.0 ms 24 -Buddy_Conv2D/1 2.08 ms 2.08 ms 337 -Buddy_Corr2D_Constant_Padding/1 1.75 ms 1.75 ms 399 -OpenCV_Filter2D_Constant_Padding/1 2.68 ms 2.68 ms 261 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4856 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2688 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104682 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49744 -Buddy_Erosion2D_Constant_Padding/1 0.219 ms 0.219 ms 3240 -Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3213 -Buddy_Opening2D_Constant_Padding/1 0.308 ms 0.308 ms 2240 -Buddy_Closing2D_Constant_Padding/1 0.309 ms 0.309 ms 2269 -Buddy_TopHat2D_Constant_Padding/1 0.820 ms 0.820 ms 841 -Buddy_BottomHat2D_Constant_Padding/1 0.800 ms 0.800 ms 846 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5072 -OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3139 -OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 3074 -OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2680 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2676 -OpenCV_MorphGrad2D_Constant_Padding/1 0.254 ms 0.254 ms 2755 -OpenCV_Dilate2D_Constant_Padding/1 0.135 ms 0.135 ms 5189 +Eigen_Convolve2D/1 12.1 ms 12.1 ms 57 +MLIR_Conv2D/1 30.5 ms 30.5 ms 23 +Buddy_Conv2D/1 2.23 ms 2.23 ms 308 +Buddy_Corr2D_Constant_Padding/1 1.85 ms 1.85 ms 374 +OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 249 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4622 +Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2559 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101510 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 48333 +Buddy_Erosion2D_Constant_Padding/1 0.243 ms 0.243 ms 2975 +Buddy_Dilation2D_Constant_Padding/1 0.240 ms 0.240 ms 2932 +Buddy_Opening2D_Constant_Padding/1 0.386 ms 0.386 ms 1773 +Buddy_Closing2D_Constant_Padding/1 0.376 ms 0.376 ms 1877 +Buddy_TopHat2D_Constant_Padding/1 0.996 ms 0.996 ms 708 +Buddy_BottomHat2D_Constant_Padding/1 0.976 ms 0.976 ms 677 +OpenCV_Erode2D_Constant_Padding/1 0.146 ms 0.146 ms 4933 +OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3075 +OpenCV_Closing2D_Constant_Padding/1 0.233 ms 0.233 ms 3052 +OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2673 +OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2683 +OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2594 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4916 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 0bb23e15..b8e4eae6 100644 --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T09:59:45+00:00", + "date": "2025-09-07T13:02:57+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.45068,1.74609,2.59521], + "load_avg": [5.82666,6.12891,7.06201], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 139, - "real_time": 5.0280020980955031e+00, - "cpu_time": 5.0279279856115116e+00, + "iterations": 136, + "real_time": 5.1284465007483959e+00, + "cpu_time": 5.1283712794117653e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 95, - "real_time": 7.3824502919849593e+00, - "cpu_time": 7.3823169052631581e+00, + "iterations": 93, + "real_time": 7.5461328510315191e+00, + "cpu_time": 7.5460632903225813e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1363, - "real_time": 5.2151041855640645e-01, - "cpu_time": 5.2149413939838585e-01, + "iterations": 1000, + "real_time": 5.0872633233666420e-01, + "cpu_time": 5.0871887600000010e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 865, - "real_time": 8.1360158160587270e-01, - "cpu_time": 8.1358696878612724e-01, + "iterations": 841, + "real_time": 8.3388392586486948e-01, + "cpu_time": 8.3387196195005986e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 548, - "real_time": 1.2772948834637221e+00, - "cpu_time": 1.2772620291970815e+00, + "iterations": 536, + "real_time": 1.3044935150711394e+00, + "cpu_time": 1.3044258451492536e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4815, - "real_time": 1.4578837239853690e-01, - "cpu_time": 1.4578228888888886e-01, + "iterations": 4609, + "real_time": 1.5180687935136616e-01, + "cpu_time": 1.5180155087871558e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2628, - "real_time": 2.6729297675362462e-01, - "cpu_time": 2.6726695243531190e-01, + "iterations": 2575, + "real_time": 2.7318556187222304e-01, + "cpu_time": 2.7316950174757265e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102923, - "real_time": 6.8076673491945352e-03, - "cpu_time": 6.8075040564305363e-03, + "iterations": 101061, + "real_time": 6.8790974146093432e-03, + "cpu_time": 6.8788971017504311e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48919, - "real_time": 1.4305153241039393e-02, - "cpu_time": 1.4304612502299703e-02, + "iterations": 48323, + "real_time": 1.4452015748223412e-02, + "cpu_time": 1.4451731452931325e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3130, - "real_time": 2.2225001761421989e-01, - "cpu_time": 2.2224536613418497e-01, + "iterations": 2784, + "real_time": 2.3488737723051473e-01, + "cpu_time": 2.3488115193965531e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3158, - "real_time": 2.2036093685297062e-01, - "cpu_time": 2.2035071089297040e-01, + "iterations": 2926, + "real_time": 2.3911561834755127e-01, + "cpu_time": 2.3910948803827733e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2177, - "real_time": 3.3364478168434980e-01, - "cpu_time": 3.3363160312356482e-01, + "iterations": 1452, + "real_time": 4.3227852991789828e-01, + "cpu_time": 4.3227057300275434e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2114, - "real_time": 3.3768443804843989e-01, - "cpu_time": 3.3767108656575201e-01, + "iterations": 1785, + "real_time": 3.9728948799501945e-01, + "cpu_time": 3.9727350868347305e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 734, - "real_time": 8.9341425146572595e-01, - "cpu_time": 8.9337308855585695e-01, + "iterations": 643, + "real_time": 1.0138588029062767e+00, + "cpu_time": 1.0138225256609652e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 761, - "real_time": 8.9906724402375038e-01, - "cpu_time": 8.9901021944809523e-01, + "iterations": 661, + "real_time": 1.0071998960852804e+00, + "cpu_time": 1.0071388003025714e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5020, - "real_time": 1.3878680765628815e-01, - "cpu_time": 1.3878371175298815e-01, + "iterations": 4784, + "real_time": 1.4612457191689956e-01, + "cpu_time": 1.4612131396321087e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3210, - "real_time": 2.1697144340317567e-01, - "cpu_time": 2.1696341401869182e-01, + "iterations": 3134, + "real_time": 2.2347150792456313e-01, + "cpu_time": 2.2346277632418668e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3180, - "real_time": 2.2020956864521937e-01, - "cpu_time": 2.2020306729559663e-01, + "iterations": 3168, + "real_time": 2.2385514816363353e-01, + "cpu_time": 2.2384855082070679e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2639, - "real_time": 2.5489669915693824e-01, - "cpu_time": 2.5488007427055692e-01, + "iterations": 2747, + "real_time": 2.5661755387115620e-01, + "cpu_time": 2.5660987295231175e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2732, - "real_time": 2.5621967152917369e-01, - "cpu_time": 2.5621515080527107e-01, + "iterations": 2718, + "real_time": 2.5766646329140819e-01, + "cpu_time": 2.5765858977189021e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2827, - "real_time": 2.4727433998971851e-01, - "cpu_time": 2.4726836151397316e-01, + "iterations": 2631, + "real_time": 2.6644636065911903e-01, + "cpu_time": 2.6643485404789002e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5123, - "real_time": 1.3593631128815267e-01, - "cpu_time": 1.3593265586570380e-01, + "iterations": 4760, + "real_time": 1.4655530656210514e-01, + "cpu_time": 1.4655188613445386e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 295916cf..c183c831 100644 --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T09:59:45+00:00 +2025-09-07T13:02:57+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.45, 1.75, 2.60 +Load Average: 5.83, 6.13, 7.06 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.03 ms 5.03 ms 139 -MLIR_Conv2D/1 7.38 ms 7.38 ms 95 -Buddy_Conv2D/1 0.522 ms 0.521 ms 1363 -Buddy_Corr2D_Constant_Padding/1 0.814 ms 0.814 ms 865 -OpenCV_Filter2D_Constant_Padding/1 1.28 ms 1.28 ms 548 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.146 ms 0.146 ms 4815 -Buddy_Resize2D_Bilinear_Interpolation/1 0.267 ms 0.267 ms 2628 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102923 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48919 -Buddy_Erosion2D_Constant_Padding/1 0.222 ms 0.222 ms 3130 -Buddy_Dilation2D_Constant_Padding/1 0.220 ms 0.220 ms 3158 -Buddy_Opening2D_Constant_Padding/1 0.334 ms 0.334 ms 2177 -Buddy_Closing2D_Constant_Padding/1 0.338 ms 0.338 ms 2114 -Buddy_TopHat2D_Constant_Padding/1 0.893 ms 0.893 ms 734 -Buddy_BottomHat2D_Constant_Padding/1 0.899 ms 0.899 ms 761 -OpenCV_Erode2D_Constant_Padding/1 0.139 ms 0.139 ms 5020 -OpenCV_Opening2D_Constant_Padding/1 0.217 ms 0.217 ms 3210 -OpenCV_Closing2D_Constant_Padding/1 0.220 ms 0.220 ms 3180 -OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2639 -OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2732 -OpenCV_MorphGrad2D_Constant_Padding/1 0.247 ms 0.247 ms 2827 -OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5123 +Eigen_Convolve2D/1 5.13 ms 5.13 ms 136 +MLIR_Conv2D/1 7.55 ms 7.55 ms 93 +Buddy_Conv2D/1 0.509 ms 0.509 ms 1000 +Buddy_Corr2D_Constant_Padding/1 0.834 ms 0.834 ms 841 +OpenCV_Filter2D_Constant_Padding/1 1.30 ms 1.30 ms 536 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.152 ms 0.152 ms 4609 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2575 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101061 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48323 +Buddy_Erosion2D_Constant_Padding/1 0.235 ms 0.235 ms 2784 +Buddy_Dilation2D_Constant_Padding/1 0.239 ms 0.239 ms 2926 +Buddy_Opening2D_Constant_Padding/1 0.432 ms 0.432 ms 1452 +Buddy_Closing2D_Constant_Padding/1 0.397 ms 0.397 ms 1785 +Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 643 +Buddy_BottomHat2D_Constant_Padding/1 1.01 ms 1.01 ms 661 +OpenCV_Erode2D_Constant_Padding/1 0.146 ms 0.146 ms 4784 +OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3134 +OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3168 +OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2747 +OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2718 +OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2631 +OpenCV_Dilate2D_Constant_Padding/1 0.147 ms 0.147 ms 4760 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 62d7e98e..f3967767 100644 --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:00:09+00:00", + "date": "2025-09-07T13:03:20+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.48438,1.73389,2.56738], + "load_avg": [4.87891,5.87744,6.95312], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 143, - "real_time": 4.8859805207360871e+00, - "cpu_time": 4.8858233356643366e+00, + "iterations": 140, + "real_time": 5.0184934533068111e+00, + "cpu_time": 5.0181792214285723e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 97, - "real_time": 7.1891580621913533e+00, - "cpu_time": 7.1889744948453602e+00, + "iterations": 92, + "real_time": 7.6453953819430396e+00, + "cpu_time": 7.6449425760869589e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1337, - "real_time": 5.2432643170221227e-01, - "cpu_time": 5.2429164921465965e-01, + "iterations": 1324, + "real_time": 5.3367181329388635e-01, + "cpu_time": 5.3365209365558919e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 882, - "real_time": 7.9190808553206404e-01, - "cpu_time": 7.9188119614512487e-01, + "iterations": 790, + "real_time": 8.6351649576350098e-01, + "cpu_time": 8.6345711772151901e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 561, - "real_time": 1.2481929319321365e+00, - "cpu_time": 1.2481498698752240e+00, + "iterations": 521, + "real_time": 1.3381383969893612e+00, + "cpu_time": 1.3381011746641083e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4818, - "real_time": 1.4271947871097246e-01, - "cpu_time": 1.4271616500622658e-01, + "iterations": 4506, + "real_time": 1.5247919416088979e-01, + "cpu_time": 1.5247161540168652e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2683, - "real_time": 2.6050025678440747e-01, - "cpu_time": 2.6049063473723449e-01, + "iterations": 2507, + "real_time": 2.7503761510046343e-01, + "cpu_time": 2.7502824491423988e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 104687, - "real_time": 6.6878359398252953e-03, - "cpu_time": 6.6876859686494008e-03, + "iterations": 101549, + "real_time": 6.8775821290780010e-03, + "cpu_time": 6.8772612039508015e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49857, - "real_time": 1.4016524784845726e-02, - "cpu_time": 1.4016148605010314e-02, + "iterations": 47841, + "real_time": 1.4524636204003844e-02, + "cpu_time": 1.4524494575782268e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3251, - "real_time": 2.2090763433443072e-01, - "cpu_time": 2.2090116671793322e-01, + "iterations": 2772, + "real_time": 2.6418570520471873e-01, + "cpu_time": 2.6416973701298724e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3237, - "real_time": 2.1412889112062133e-01, - "cpu_time": 2.1412120729070164e-01, + "iterations": 2644, + "real_time": 2.6406940501098736e-01, + "cpu_time": 2.6405351172465935e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2241, - "real_time": 3.2267528754400920e-01, - "cpu_time": 3.2266634225792068e-01, + "iterations": 1476, + "real_time": 4.5833709904656500e-01, + "cpu_time": 4.5832794173441710e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2271, - "real_time": 3.0766681856990435e-01, - "cpu_time": 3.0765447203875002e-01, + "iterations": 1514, + "real_time": 4.6325272719327809e-01, + "cpu_time": 4.6324274768824364e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 841, - "real_time": 8.0524559944478280e-01, - "cpu_time": 8.0521395362663450e-01, + "iterations": 614, + "real_time": 1.1151395738124847e+00, + "cpu_time": 1.1151286921824080e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 846, - "real_time": 8.0883008456540162e-01, - "cpu_time": 8.0878250000000096e-01, + "iterations": 600, + "real_time": 1.1484713479876518e+00, + "cpu_time": 1.1484272233333332e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5105, - "real_time": 1.3598477528097580e-01, - "cpu_time": 1.3598074338883434e-01, + "iterations": 4834, + "real_time": 1.4438382267064184e-01, + "cpu_time": 1.4438079209764174e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3219, - "real_time": 2.1738641869025632e-01, - "cpu_time": 2.1737873221497292e-01, + "iterations": 3113, + "real_time": 2.2669639252904064e-01, + "cpu_time": 2.2668890459363947e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3216, - "real_time": 2.1741356276698523e-01, - "cpu_time": 2.1740724844527404e-01, + "iterations": 2911, + "real_time": 2.4054079162669076e-01, + "cpu_time": 2.4053840467193477e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2710, - "real_time": 2.5812379768413812e-01, - "cpu_time": 2.5811859741697379e-01, + "iterations": 2646, + "real_time": 2.6420583400821829e-01, + "cpu_time": 2.6419529705215339e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2740, - "real_time": 2.5566765888981574e-01, - "cpu_time": 2.5566491569343169e-01, + "iterations": 2629, + "real_time": 2.7253198341391033e-01, + "cpu_time": 2.7252830962343044e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2803, - "real_time": 2.4951168754953085e-01, - "cpu_time": 2.4950610916874724e-01, + "iterations": 2630, + "real_time": 2.6534779613462239e-01, + "cpu_time": 2.6533501787072172e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5105, - "real_time": 1.3737050993377151e-01, - "cpu_time": 1.3736774319294784e-01, + "iterations": 4812, + "real_time": 1.4677566591517091e-01, + "cpu_time": 1.4676936450540284e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 0e157215..62ca55c3 100644 --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:00:09+00:00 +2025-09-07T13:03:20+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.48, 1.73, 2.57 +Load Average: 4.88, 5.88, 6.95 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.89 ms 4.89 ms 143 -MLIR_Conv2D/1 7.19 ms 7.19 ms 97 -Buddy_Conv2D/1 0.524 ms 0.524 ms 1337 -Buddy_Corr2D_Constant_Padding/1 0.792 ms 0.792 ms 882 -OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 561 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4818 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.260 ms 2683 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104687 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49857 -Buddy_Erosion2D_Constant_Padding/1 0.221 ms 0.221 ms 3251 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3237 -Buddy_Opening2D_Constant_Padding/1 0.323 ms 0.323 ms 2241 -Buddy_Closing2D_Constant_Padding/1 0.308 ms 0.308 ms 2271 -Buddy_TopHat2D_Constant_Padding/1 0.805 ms 0.805 ms 841 -Buddy_BottomHat2D_Constant_Padding/1 0.809 ms 0.809 ms 846 -OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5105 -OpenCV_Opening2D_Constant_Padding/1 0.217 ms 0.217 ms 3219 -OpenCV_Closing2D_Constant_Padding/1 0.217 ms 0.217 ms 3216 -OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2710 -OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2740 -OpenCV_MorphGrad2D_Constant_Padding/1 0.250 ms 0.250 ms 2803 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5105 +Eigen_Convolve2D/1 5.02 ms 5.02 ms 140 +MLIR_Conv2D/1 7.65 ms 7.64 ms 92 +Buddy_Conv2D/1 0.534 ms 0.534 ms 1324 +Buddy_Corr2D_Constant_Padding/1 0.864 ms 0.863 ms 790 +OpenCV_Filter2D_Constant_Padding/1 1.34 ms 1.34 ms 521 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.152 ms 0.152 ms 4506 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2507 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101549 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47841 +Buddy_Erosion2D_Constant_Padding/1 0.264 ms 0.264 ms 2772 +Buddy_Dilation2D_Constant_Padding/1 0.264 ms 0.264 ms 2644 +Buddy_Opening2D_Constant_Padding/1 0.458 ms 0.458 ms 1476 +Buddy_Closing2D_Constant_Padding/1 0.463 ms 0.463 ms 1514 +Buddy_TopHat2D_Constant_Padding/1 1.12 ms 1.12 ms 614 +Buddy_BottomHat2D_Constant_Padding/1 1.15 ms 1.15 ms 600 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4834 +OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 3113 +OpenCV_Closing2D_Constant_Padding/1 0.241 ms 0.241 ms 2911 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2646 +OpenCV_BottomHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2629 +OpenCV_MorphGrad2D_Constant_Padding/1 0.265 ms 0.265 ms 2630 +OpenCV_Dilate2D_Constant_Padding/1 0.147 ms 0.147 ms 4812 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index c1db9850..c572ef69 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:00:33+00:00", + "date": "2025-09-07T13:03:44+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.31787,1.67383,2.52441], + "load_avg": [4.40625,5.69189,6.8623], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 143, - "real_time": 4.8869541501040228e+00, - "cpu_time": 4.8868488251748250e+00, + "iterations": 136, + "real_time": 5.1616181290763263e+00, + "cpu_time": 5.1615701838235291e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 97, - "real_time": 7.1941099907319570e+00, - "cpu_time": 7.1940195670103098e+00, + "iterations": 91, + "real_time": 7.6719810674478719e+00, + "cpu_time": 7.6718306263736249e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1334, - "real_time": 5.2251873713830066e-01, - "cpu_time": 5.2250321664167931e-01, + "iterations": 1318, + "real_time": 5.1680353381112787e-01, + "cpu_time": 5.1676989984825494e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 882, - "real_time": 7.9283786223791619e-01, - "cpu_time": 7.9282752607709761e-01, + "iterations": 810, + "real_time": 8.7384458769250795e-01, + "cpu_time": 8.7382117160493800e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 561, - "real_time": 1.2480702330881261e+00, - "cpu_time": 1.2480264402852053e+00, + "iterations": 527, + "real_time": 1.3300638073309550e+00, + "cpu_time": 1.3300308349146115e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4847, - "real_time": 1.4256667295804781e-01, - "cpu_time": 1.4255895791211062e-01, + "iterations": 4520, + "real_time": 1.5405326817942933e-01, + "cpu_time": 1.5404699181415932e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2679, - "real_time": 2.6033759623287595e-01, - "cpu_time": 2.6032852967525205e-01, + "iterations": 2534, + "real_time": 2.7796252385875608e-01, + "cpu_time": 2.7795281649565928e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 104555, - "real_time": 6.6839088878435033e-03, - "cpu_time": 6.6837356319640398e-03, + "iterations": 100621, + "real_time": 6.9713447932306287e-03, + "cpu_time": 6.9711385197920878e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49894, - "real_time": 1.4026098685923638e-02, - "cpu_time": 1.4025601475127254e-02, + "iterations": 47677, + "real_time": 1.4668967632108515e-02, + "cpu_time": 1.4668751662227062e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3270, - "real_time": 2.1376049299852565e-01, - "cpu_time": 2.1375075779816491e-01, + "iterations": 2528, + "real_time": 2.6593122430900229e-01, + "cpu_time": 2.6592099723101265e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3269, - "real_time": 2.1365958981901981e-01, - "cpu_time": 2.1365636586112000e-01, + "iterations": 2586, + "real_time": 2.6167884803242397e-01, + "cpu_time": 2.6166388399071899e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2113, - "real_time": 3.1973698017329782e-01, - "cpu_time": 3.1972719451017506e-01, + "iterations": 1531, + "real_time": 4.6423418723548027e-01, + "cpu_time": 4.6422537557152155e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2219, - "real_time": 3.0630169526152162e-01, - "cpu_time": 3.0629267507886482e-01, + "iterations": 1535, + "real_time": 4.6207869625635178e-01, + "cpu_time": 4.6206046188925093e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 863, - "real_time": 7.8096507961062123e-01, - "cpu_time": 7.8092313209733399e-01, + "iterations": 599, + "real_time": 1.1238463732953461e+00, + "cpu_time": 1.1238221803004993e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 836, - "real_time": 7.9488013002076785e-01, - "cpu_time": 7.9486416148325389e-01, + "iterations": 647, + "real_time": 1.1313838029202574e+00, + "cpu_time": 1.1313273972179276e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5175, - "real_time": 1.3511446327115026e-01, - "cpu_time": 1.3511111072463786e-01, + "iterations": 4801, + "real_time": 1.4477257687234552e-01, + "cpu_time": 1.4477115496771487e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3222, - "real_time": 2.1722390704333303e-01, - "cpu_time": 2.1721783985102436e-01, + "iterations": 3080, + "real_time": 2.2534245455806906e-01, + "cpu_time": 2.2533414415584410e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3153, - "real_time": 2.2210372104423221e-01, - "cpu_time": 2.2209552553123957e-01, + "iterations": 3006, + "real_time": 2.3400538780057897e-01, + "cpu_time": 2.3400324983366630e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2720, - "real_time": 2.5731744244694710e-01, - "cpu_time": 2.5730785477941137e-01, + "iterations": 2627, + "real_time": 2.6628348109928157e-01, + "cpu_time": 2.6627249143509668e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2721, - "real_time": 2.5738097552055383e-01, - "cpu_time": 2.5736759573686152e-01, + "iterations": 2595, + "real_time": 2.7239328261055701e-01, + "cpu_time": 2.7238921310211944e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2805, - "real_time": 2.4959671903740277e-01, - "cpu_time": 2.4958885418894797e-01, + "iterations": 2627, + "real_time": 2.6650840808649140e-01, + "cpu_time": 2.6649613247049775e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5121, - "real_time": 1.3665625601104869e-01, - "cpu_time": 1.3665107752392106e-01, + "iterations": 4881, + "real_time": 1.4324055111107828e-01, + "cpu_time": 1.4323654189715235e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index ca9c8f3b..b21eda9f 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:00:33+00:00 +2025-09-07T13:03:44+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.32, 1.67, 2.52 +Load Average: 4.41, 5.69, 6.86 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.89 ms 4.89 ms 143 -MLIR_Conv2D/1 7.19 ms 7.19 ms 97 -Buddy_Conv2D/1 0.523 ms 0.523 ms 1334 -Buddy_Corr2D_Constant_Padding/1 0.793 ms 0.793 ms 882 -OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 561 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4847 -Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2679 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104555 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49894 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3270 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3269 -Buddy_Opening2D_Constant_Padding/1 0.320 ms 0.320 ms 2113 -Buddy_Closing2D_Constant_Padding/1 0.306 ms 0.306 ms 2219 -Buddy_TopHat2D_Constant_Padding/1 0.781 ms 0.781 ms 863 -Buddy_BottomHat2D_Constant_Padding/1 0.795 ms 0.795 ms 836 -OpenCV_Erode2D_Constant_Padding/1 0.135 ms 0.135 ms 5175 -OpenCV_Opening2D_Constant_Padding/1 0.217 ms 0.217 ms 3222 -OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3153 -OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2720 -OpenCV_BottomHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2721 -OpenCV_MorphGrad2D_Constant_Padding/1 0.250 ms 0.250 ms 2805 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5121 +Eigen_Convolve2D/1 5.16 ms 5.16 ms 136 +MLIR_Conv2D/1 7.67 ms 7.67 ms 91 +Buddy_Conv2D/1 0.517 ms 0.517 ms 1318 +Buddy_Corr2D_Constant_Padding/1 0.874 ms 0.874 ms 810 +OpenCV_Filter2D_Constant_Padding/1 1.33 ms 1.33 ms 527 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.154 ms 0.154 ms 4520 +Buddy_Resize2D_Bilinear_Interpolation/1 0.278 ms 0.278 ms 2534 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100621 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47677 +Buddy_Erosion2D_Constant_Padding/1 0.266 ms 0.266 ms 2528 +Buddy_Dilation2D_Constant_Padding/1 0.262 ms 0.262 ms 2586 +Buddy_Opening2D_Constant_Padding/1 0.464 ms 0.464 ms 1531 +Buddy_Closing2D_Constant_Padding/1 0.462 ms 0.462 ms 1535 +Buddy_TopHat2D_Constant_Padding/1 1.12 ms 1.12 ms 599 +Buddy_BottomHat2D_Constant_Padding/1 1.13 ms 1.13 ms 647 +OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4801 +OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3080 +OpenCV_Closing2D_Constant_Padding/1 0.234 ms 0.234 ms 3006 +OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2627 +OpenCV_BottomHat2D_Constant_Padding/1 0.272 ms 0.272 ms 2595 +OpenCV_MorphGrad2D_Constant_Padding/1 0.267 ms 0.266 ms 2627 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4881 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index b481f940..8d237431 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:00:56+00:00", + "date": "2025-09-07T13:04:08+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.22656,1.62939,2.49121], + "load_avg": [4.30176,5.58203,6.80029], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 144, - "real_time": 4.8671740935080585e+00, - "cpu_time": 4.8670024861111107e+00, + "iterations": 138, + "real_time": 5.1051492978265323e+00, + "cpu_time": 5.1049708043478264e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 97, - "real_time": 7.1850729741386532e+00, - "cpu_time": 7.1848575360824745e+00, + "iterations": 91, + "real_time": 7.6753021023430668e+00, + "cpu_time": 7.6749714395604389e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1322, - "real_time": 5.2583411916436662e-01, - "cpu_time": 5.2582321558245060e-01, + "iterations": 1299, + "real_time": 5.4009209707482586e-01, + "cpu_time": 5.4006453117782904e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 885, - "real_time": 7.9312442596686084e-01, - "cpu_time": 7.9308901807909582e-01, + "iterations": 808, + "real_time": 8.6884597076637915e-01, + "cpu_time": 8.6882243193069264e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 560, - "real_time": 1.2487932374434811e+00, - "cpu_time": 1.2487409732142858e+00, + "iterations": 521, + "real_time": 1.3466300567944540e+00, + "cpu_time": 1.3465901938579645e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4865, - "real_time": 1.4262197755529848e-01, - "cpu_time": 1.4261277903391573e-01, + "iterations": 4586, + "real_time": 1.5055361811869331e-01, + "cpu_time": 1.5055115263846494e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2697, - "real_time": 2.6017889065525024e-01, - "cpu_time": 2.6016967371153132e-01, + "iterations": 2536, + "real_time": 2.7593569418750352e-01, + "cpu_time": 2.7592326301261838e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105155, - "real_time": 6.6622555692346331e-03, - "cpu_time": 6.6618292710760377e-03, + "iterations": 100865, + "real_time": 6.9434849884870160e-03, + "cpu_time": 6.9433250483319220e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49883, - "real_time": 1.4039483654351604e-02, - "cpu_time": 1.4039219974740884e-02, + "iterations": 47835, + "real_time": 1.4630887664023569e-02, + "cpu_time": 1.4630675154175796e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3259, - "real_time": 2.1370667615581340e-01, - "cpu_time": 2.1369622583614614e-01, + "iterations": 2523, + "real_time": 2.7164263452181397e-01, + "cpu_time": 2.7162846452635753e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3259, - "real_time": 2.1333487355186004e-01, - "cpu_time": 2.1332873366063201e-01, + "iterations": 2535, + "real_time": 2.7691923243524524e-01, + "cpu_time": 2.7690100433925030e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2258, - "real_time": 3.1496402605430968e-01, - "cpu_time": 3.1494664968999142e-01, + "iterations": 1341, + "real_time": 4.7276657433051122e-01, + "cpu_time": 4.7274863310961951e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2255, - "real_time": 3.1185760482848351e-01, - "cpu_time": 3.1184659645232815e-01, + "iterations": 1364, + "real_time": 4.9240803045611226e-01, + "cpu_time": 4.9239791862170046e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 854, - "real_time": 7.8316566161375134e-01, - "cpu_time": 7.8311624238875910e-01, + "iterations": 579, + "real_time": 1.1095030101366816e+00, + "cpu_time": 1.1094485198618311e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 821, - "real_time": 7.8512378599626287e-01, - "cpu_time": 7.8508044214372708e-01, + "iterations": 687, + "real_time": 1.1041596737924795e+00, + "cpu_time": 1.1041230829694306e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5119, - "real_time": 1.3652631023603623e-01, - "cpu_time": 1.3651533834733337e-01, + "iterations": 4899, + "real_time": 1.4196342961061584e-01, + "cpu_time": 1.4195862380077542e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3167, - "real_time": 2.2093217465867745e-01, - "cpu_time": 2.2092883517524411e-01, + "iterations": 3199, + "real_time": 2.1802198937178477e-01, + "cpu_time": 2.1801989809315439e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3139, - "real_time": 2.2298171546627055e-01, - "cpu_time": 2.2297460560688059e-01, + "iterations": 3190, + "real_time": 2.2261308091561249e-01, + "cpu_time": 2.2260142257053250e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2712, - "real_time": 2.5806632675889846e-01, - "cpu_time": 2.5805932227138600e-01, + "iterations": 2723, + "real_time": 2.6208069962129582e-01, + "cpu_time": 2.6207134373852359e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2733, - "real_time": 2.5592657863177792e-01, - "cpu_time": 2.5591782729601126e-01, + "iterations": 2666, + "real_time": 2.6216383238126706e-01, + "cpu_time": 2.6214668942235603e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2804, - "real_time": 2.4944995018452959e-01, - "cpu_time": 2.4944712589158421e-01, + "iterations": 2677, + "real_time": 2.6168456168196119e-01, + "cpu_time": 2.6168025476279372e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5118, - "real_time": 1.3672088059236781e-01, - "cpu_time": 1.3671753341148860e-01, + "iterations": 4887, + "real_time": 1.4247795738811894e-01, + "cpu_time": 1.4247083793738480e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index ff32d637..b36785c7 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:00:56+00:00 +2025-09-07T13:04:08+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.23, 1.63, 2.49 +Load Average: 4.30, 5.58, 6.80 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.87 ms 4.87 ms 144 -MLIR_Conv2D/1 7.19 ms 7.18 ms 97 -Buddy_Conv2D/1 0.526 ms 0.526 ms 1322 -Buddy_Corr2D_Constant_Padding/1 0.793 ms 0.793 ms 885 -OpenCV_Filter2D_Constant_Padding/1 1.25 ms 1.25 ms 560 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4865 -Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2697 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105155 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49883 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3259 -Buddy_Dilation2D_Constant_Padding/1 0.213 ms 0.213 ms 3259 -Buddy_Opening2D_Constant_Padding/1 0.315 ms 0.315 ms 2258 -Buddy_Closing2D_Constant_Padding/1 0.312 ms 0.312 ms 2255 -Buddy_TopHat2D_Constant_Padding/1 0.783 ms 0.783 ms 854 -Buddy_BottomHat2D_Constant_Padding/1 0.785 ms 0.785 ms 821 -OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5119 -OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3167 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3139 -OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2712 -OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2733 -OpenCV_MorphGrad2D_Constant_Padding/1 0.249 ms 0.249 ms 2804 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5118 +Eigen_Convolve2D/1 5.11 ms 5.10 ms 138 +MLIR_Conv2D/1 7.68 ms 7.67 ms 91 +Buddy_Conv2D/1 0.540 ms 0.540 ms 1299 +Buddy_Corr2D_Constant_Padding/1 0.869 ms 0.869 ms 808 +OpenCV_Filter2D_Constant_Padding/1 1.35 ms 1.35 ms 521 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.151 ms 0.151 ms 4586 +Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2536 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100865 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47835 +Buddy_Erosion2D_Constant_Padding/1 0.272 ms 0.272 ms 2523 +Buddy_Dilation2D_Constant_Padding/1 0.277 ms 0.277 ms 2535 +Buddy_Opening2D_Constant_Padding/1 0.473 ms 0.473 ms 1341 +Buddy_Closing2D_Constant_Padding/1 0.492 ms 0.492 ms 1364 +Buddy_TopHat2D_Constant_Padding/1 1.11 ms 1.11 ms 579 +Buddy_BottomHat2D_Constant_Padding/1 1.10 ms 1.10 ms 687 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4899 +OpenCV_Opening2D_Constant_Padding/1 0.218 ms 0.218 ms 3199 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3190 +OpenCV_TopHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2723 +OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2666 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2677 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4887 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 0c93522f..61b75d6a 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:01:20+00:00", + "date": "2025-09-07T13:04:32+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.14795,1.57764,2.44971], + "load_avg": [3.85547,5.37354,6.69824], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 61, - "real_time": 1.1490864648682173e+01, - "cpu_time": 1.1490722819672131e+01, + "iterations": 57, + "real_time": 1.2151056606518594e+01, + "cpu_time": 1.2150699157894737e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 24, - "real_time": 2.8909380702922743e+01, - "cpu_time": 2.8909021499999994e+01, + "iterations": 23, + "real_time": 3.0677807233903717e+01, + "cpu_time": 3.0676721304347819e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 231, - "real_time": 3.0301709405400534e+00, - "cpu_time": 3.0300729220779217e+00, + "iterations": 209, + "real_time": 3.3309125836100875e+00, + "cpu_time": 3.3308292583732060e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 303, - "real_time": 2.3071401731015824e+00, - "cpu_time": 2.3070868151815174e+00, + "iterations": 287, + "real_time": 2.4366429146989299e+00, + "cpu_time": 2.4365807421602783e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 171, - "real_time": 4.1065351351311330e+00, - "cpu_time": 4.1064032222222222e+00, + "iterations": 161, + "real_time": 4.3121754475261858e+00, + "cpu_time": 4.3120131366459598e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4848, - "real_time": 1.4249105528980591e-01, - "cpu_time": 1.4248396947194716e-01, + "iterations": 4635, + "real_time": 1.4946488854440157e-01, + "cpu_time": 1.4945905307443369e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2686, - "real_time": 2.6072073888729869e-01, - "cpu_time": 2.6071289240506312e-01, + "iterations": 2563, + "real_time": 2.7466075690379610e-01, + "cpu_time": 2.7464417362465848e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105211, - "real_time": 6.6560155957683153e-03, - "cpu_time": 6.6558318331733325e-03, + "iterations": 102703, + "real_time": 6.8518726575323881e-03, + "cpu_time": 6.8517319455127989e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49990, - "real_time": 1.4024660934696533e-02, - "cpu_time": 1.4024203120624117e-02, + "iterations": 48620, + "real_time": 1.4454776789734071e-02, + "cpu_time": 1.4454200761003693e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3269, - "real_time": 2.1371879390107268e-01, - "cpu_time": 2.1370936004894461e-01, + "iterations": 2882, + "real_time": 2.4118318528164434e-01, + "cpu_time": 2.4117433102012481e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3239, - "real_time": 2.1510430982301323e-01, - "cpu_time": 2.1509574992281527e-01, + "iterations": 2898, + "real_time": 2.3979534124447774e-01, + "cpu_time": 2.3978525431331929e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2276, - "real_time": 3.1433540915248681e-01, - "cpu_time": 3.1432626493848848e-01, + "iterations": 1888, + "real_time": 3.8621500647484752e-01, + "cpu_time": 3.8620675741525484e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2257, - "real_time": 3.0752330173366516e-01, - "cpu_time": 3.0751668187859921e-01, + "iterations": 1755, + "real_time": 4.4991118870569430e-01, + "cpu_time": 4.4989996011396011e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 840, - "real_time": 8.0359440429934437e-01, - "cpu_time": 8.0356525238095189e-01, + "iterations": 660, + "real_time": 1.0221678989403176e+00, + "cpu_time": 1.0221497363636358e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 836, - "real_time": 7.8342987545298048e-01, - "cpu_time": 7.8342100717703167e-01, + "iterations": 688, + "real_time": 1.0242091686746408e+00, + "cpu_time": 1.0241701438953492e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5131, - "real_time": 1.3636051011210970e-01, - "cpu_time": 1.3635655388813078e-01, + "iterations": 4811, + "real_time": 1.4358576572231285e-01, + "cpu_time": 1.4358440968613598e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3120, - "real_time": 2.2432351461014685e-01, - "cpu_time": 2.2431598076923051e-01, + "iterations": 3091, + "real_time": 2.2554094835378252e-01, + "cpu_time": 2.2553393950177869e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3078, - "real_time": 2.2727604580243119e-01, - "cpu_time": 2.2726826185834917e-01, + "iterations": 3116, + "real_time": 2.2321108690390079e-01, + "cpu_time": 2.2319902118100216e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2688, - "real_time": 2.6022881640875267e-01, - "cpu_time": 2.6022399479166697e-01, + "iterations": 2685, + "real_time": 2.6148691551423381e-01, + "cpu_time": 2.6147274823091216e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2702, - "real_time": 2.5923438659758147e-01, - "cpu_time": 2.5922455366395236e-01, + "iterations": 2680, + "real_time": 2.5973816527359522e-01, + "cpu_time": 2.5973222238806032e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2764, - "real_time": 2.5320007343130935e-01, - "cpu_time": 2.5319149674384939e-01, + "iterations": 2641, + "real_time": 2.6617153057181803e-01, + "cpu_time": 2.6616062021961406e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5022, - "real_time": 1.3919629242637632e-01, - "cpu_time": 1.3919045081640841e-01, + "iterations": 4895, + "real_time": 1.4290322641917219e-01, + "cpu_time": 1.4289656996935668e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index a5546517..51fd917b 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:01:20+00:00 +2025-09-07T13:04:32+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.15, 1.58, 2.45 +Load Average: 3.86, 5.37, 6.70 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.5 ms 11.5 ms 61 -MLIR_Conv2D/1 28.9 ms 28.9 ms 24 -Buddy_Conv2D/1 3.03 ms 3.03 ms 231 -Buddy_Corr2D_Constant_Padding/1 2.31 ms 2.31 ms 303 -OpenCV_Filter2D_Constant_Padding/1 4.11 ms 4.11 ms 171 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.142 ms 0.142 ms 4848 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2686 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105211 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49990 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3269 -Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3239 -Buddy_Opening2D_Constant_Padding/1 0.314 ms 0.314 ms 2276 -Buddy_Closing2D_Constant_Padding/1 0.308 ms 0.308 ms 2257 -Buddy_TopHat2D_Constant_Padding/1 0.804 ms 0.804 ms 840 -Buddy_BottomHat2D_Constant_Padding/1 0.783 ms 0.783 ms 836 -OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5131 -OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3120 -OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3078 -OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2688 -OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2702 -OpenCV_MorphGrad2D_Constant_Padding/1 0.253 ms 0.253 ms 2764 -OpenCV_Dilate2D_Constant_Padding/1 0.139 ms 0.139 ms 5022 +Eigen_Convolve2D/1 12.2 ms 12.2 ms 57 +MLIR_Conv2D/1 30.7 ms 30.7 ms 23 +Buddy_Conv2D/1 3.33 ms 3.33 ms 209 +Buddy_Corr2D_Constant_Padding/1 2.44 ms 2.44 ms 287 +OpenCV_Filter2D_Constant_Padding/1 4.31 ms 4.31 ms 161 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4635 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2563 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102703 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48620 +Buddy_Erosion2D_Constant_Padding/1 0.241 ms 0.241 ms 2882 +Buddy_Dilation2D_Constant_Padding/1 0.240 ms 0.240 ms 2898 +Buddy_Opening2D_Constant_Padding/1 0.386 ms 0.386 ms 1888 +Buddy_Closing2D_Constant_Padding/1 0.450 ms 0.450 ms 1755 +Buddy_TopHat2D_Constant_Padding/1 1.02 ms 1.02 ms 660 +Buddy_BottomHat2D_Constant_Padding/1 1.02 ms 1.02 ms 688 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4811 +OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3091 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3116 +OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2685 +OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2680 +OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2641 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4895 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 5ce9a83a..8bae8bbe 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:01:44+00:00", + "date": "2025-09-07T13:04:57+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.09619,1.53027,2.41064], + "load_avg": [3.30176,5.11816,6.57764], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 61, - "real_time": 1.1557955600199152e+01, - "cpu_time": 1.1557535721311476e+01, + "iterations": 58, + "real_time": 1.2099689322298971e+01, + "cpu_time": 1.2099262362068966e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 24, - "real_time": 2.8690188502271969e+01, - "cpu_time": 2.8689408208333333e+01, + "iterations": 23, + "real_time": 3.0390074881522551e+01, + "cpu_time": 3.0389264652173903e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 231, - "real_time": 3.0273355350091862e+00, - "cpu_time": 3.0272740259740258e+00, + "iterations": 213, + "real_time": 3.2895873410041343e+00, + "cpu_time": 3.2894827981220649e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 302, - "real_time": 2.3065690901007083e+00, - "cpu_time": 2.3065092152317894e+00, + "iterations": 288, + "real_time": 2.4207590354813471e+00, + "cpu_time": 2.4206542881944442e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 170, - "real_time": 4.1036540971082802e+00, - "cpu_time": 4.1035620235294115e+00, + "iterations": 162, + "real_time": 4.3342783817170583e+00, + "cpu_time": 4.3339787654320956e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4835, - "real_time": 1.4248023227612261e-01, - "cpu_time": 1.4247486577042395e-01, + "iterations": 4631, + "real_time": 1.4959857160600776e-01, + "cpu_time": 1.4958810192183106e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2693, - "real_time": 2.5990511426640583e-01, - "cpu_time": 2.5989337541774982e-01, + "iterations": 2549, + "real_time": 2.7472342712685566e-01, + "cpu_time": 2.7470473715182425e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 104962, - "real_time": 6.6757263726710285e-03, - "cpu_time": 6.6756417274823291e-03, + "iterations": 102077, + "real_time": 6.8577059577568673e-03, + "cpu_time": 6.8574325166296029e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49843, - "real_time": 1.4048672123858236e-02, - "cpu_time": 1.4048335934835362e-02, + "iterations": 48376, + "real_time": 1.4470884157829843e-02, + "cpu_time": 1.4470130291880297e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3259, - "real_time": 2.1453584358896277e-01, - "cpu_time": 2.1452855630561513e-01, + "iterations": 2942, + "real_time": 2.3797876818864058e-01, + "cpu_time": 2.3796595377294330e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3254, - "real_time": 2.1427505911220857e-01, - "cpu_time": 2.1427030762138938e-01, + "iterations": 2939, + "real_time": 2.4021909594049093e-01, + "cpu_time": 2.4021315787682859e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2262, - "real_time": 3.0703992178543388e-01, - "cpu_time": 3.0703200707338635e-01, + "iterations": 1345, + "real_time": 4.9373165991669693e-01, + "cpu_time": 4.9368963048327069e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2241, - "real_time": 3.1868767929178221e-01, - "cpu_time": 3.1867821329763490e-01, + "iterations": 1304, + "real_time": 4.6933880875721301e-01, + "cpu_time": 4.6931679907975549e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 851, - "real_time": 7.8355771140332231e-01, - "cpu_time": 7.8351834195064463e-01, + "iterations": 602, + "real_time": 1.1125180523755938e+00, + "cpu_time": 1.1124859534883720e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 840, - "real_time": 7.7708095666908084e-01, - "cpu_time": 7.7707133690476238e-01, + "iterations": 575, + "real_time": 1.1323607032713683e+00, + "cpu_time": 1.1323044973913052e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5192, - "real_time": 1.3475235621911261e-01, - "cpu_time": 1.3474997862095520e-01, + "iterations": 4821, + "real_time": 1.4600104651765639e-01, + "cpu_time": 1.4599840883634121e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3030, - "real_time": 2.3070538643956578e-01, - "cpu_time": 2.3069644389438895e-01, + "iterations": 2876, + "real_time": 2.3280639558906183e-01, + "cpu_time": 2.3279589916550708e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3053, - "real_time": 2.2915481686435915e-01, - "cpu_time": 2.2914178480183464e-01, + "iterations": 3193, + "real_time": 2.3576448068624747e-01, + "cpu_time": 2.3575711556529910e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2609, - "real_time": 2.6790612302620598e-01, - "cpu_time": 2.6789994863932470e-01, + "iterations": 2578, + "real_time": 2.7489120428232927e-01, + "cpu_time": 2.7488053335919271e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2624, - "real_time": 2.6675567340401068e-01, - "cpu_time": 2.6674890320121936e-01, + "iterations": 2558, + "real_time": 2.6976516830026404e-01, + "cpu_time": 2.6975646051602908e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2714, - "real_time": 2.5794556255980106e-01, - "cpu_time": 2.5793842372881365e-01, + "iterations": 2613, + "real_time": 2.6876007346097786e-01, + "cpu_time": 2.6874270340604567e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5119, - "real_time": 1.3671573276956650e-01, - "cpu_time": 1.3671077788630612e-01, + "iterations": 4816, + "real_time": 1.5157200256826672e-01, + "cpu_time": 1.5156344622092988e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 7365203e..92457329 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:01:44+00:00 +2025-09-07T13:04:57+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.10, 1.53, 2.41 +Load Average: 3.30, 5.12, 6.58 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 11.6 ms 11.6 ms 61 -MLIR_Conv2D/1 28.7 ms 28.7 ms 24 -Buddy_Conv2D/1 3.03 ms 3.03 ms 231 -Buddy_Corr2D_Constant_Padding/1 2.31 ms 2.31 ms 302 -OpenCV_Filter2D_Constant_Padding/1 4.10 ms 4.10 ms 170 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.142 ms 0.142 ms 4835 -Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2693 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 104962 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49843 -Buddy_Erosion2D_Constant_Padding/1 0.215 ms 0.215 ms 3259 -Buddy_Dilation2D_Constant_Padding/1 0.214 ms 0.214 ms 3254 -Buddy_Opening2D_Constant_Padding/1 0.307 ms 0.307 ms 2262 -Buddy_Closing2D_Constant_Padding/1 0.319 ms 0.319 ms 2241 -Buddy_TopHat2D_Constant_Padding/1 0.784 ms 0.784 ms 851 -Buddy_BottomHat2D_Constant_Padding/1 0.777 ms 0.777 ms 840 -OpenCV_Erode2D_Constant_Padding/1 0.135 ms 0.135 ms 5192 -OpenCV_Opening2D_Constant_Padding/1 0.231 ms 0.231 ms 3030 -OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3053 -OpenCV_TopHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2609 -OpenCV_BottomHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2624 -OpenCV_MorphGrad2D_Constant_Padding/1 0.258 ms 0.258 ms 2714 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5119 +Eigen_Convolve2D/1 12.1 ms 12.1 ms 58 +MLIR_Conv2D/1 30.4 ms 30.4 ms 23 +Buddy_Conv2D/1 3.29 ms 3.29 ms 213 +Buddy_Corr2D_Constant_Padding/1 2.42 ms 2.42 ms 288 +OpenCV_Filter2D_Constant_Padding/1 4.33 ms 4.33 ms 162 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4631 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2549 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102077 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48376 +Buddy_Erosion2D_Constant_Padding/1 0.238 ms 0.238 ms 2942 +Buddy_Dilation2D_Constant_Padding/1 0.240 ms 0.240 ms 2939 +Buddy_Opening2D_Constant_Padding/1 0.494 ms 0.494 ms 1345 +Buddy_Closing2D_Constant_Padding/1 0.469 ms 0.469 ms 1304 +Buddy_TopHat2D_Constant_Padding/1 1.11 ms 1.11 ms 602 +Buddy_BottomHat2D_Constant_Padding/1 1.13 ms 1.13 ms 575 +OpenCV_Erode2D_Constant_Padding/1 0.146 ms 0.146 ms 4821 +OpenCV_Opening2D_Constant_Padding/1 0.233 ms 0.233 ms 2876 +OpenCV_Closing2D_Constant_Padding/1 0.236 ms 0.236 ms 3193 +OpenCV_TopHat2D_Constant_Padding/1 0.275 ms 0.275 ms 2578 +OpenCV_BottomHat2D_Constant_Padding/1 0.270 ms 0.270 ms 2558 +OpenCV_MorphGrad2D_Constant_Padding/1 0.269 ms 0.269 ms 2613 +OpenCV_Dilate2D_Constant_Padding/1 0.152 ms 0.152 ms 4816 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index ddaa93a1..740cc205 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:02:08+00:00", + "date": "2025-09-07T13:05:21+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.06201,1.48633,2.37158], + "load_avg": [3.19775,4.94678,6.48145], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 31, - "real_time": 2.1583555506602412e+01, - "cpu_time": 2.1583274354838714e+01, + "real_time": 2.2834600099632816e+01, + "cpu_time": 2.2833829258064519e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 11, - "real_time": 6.6695000637661323e+01, - "cpu_time": 6.6693548545454576e+01, + "iterations": 10, + "real_time": 7.0026343688368797e+01, + "cpu_time": 7.0024338400000005e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 114, - "real_time": 6.1205455118365455e+00, - "cpu_time": 6.1202609210526315e+00, + "iterations": 104, + "real_time": 6.7405158367294531e+00, + "cpu_time": 6.7402173653846171e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 151, - "real_time": 4.6537321504970262e+00, - "cpu_time": 4.6535720132450340e+00, + "iterations": 137, + "real_time": 5.0721328134519341e+00, + "cpu_time": 5.0720212627737213e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 81, - "real_time": 8.5981621142522791e+00, - "cpu_time": 8.5979825679012354e+00, + "iterations": 77, + "real_time": 9.1429674489931632e+00, + "cpu_time": 9.1425494025974068e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4847, - "real_time": 1.4300990679877945e-01, - "cpu_time": 1.4300421848566114e-01, + "iterations": 4577, + "real_time": 1.5241156727147201e-01, + "cpu_time": 1.5240754642779097e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2686, - "real_time": 2.6083683521554085e-01, - "cpu_time": 2.6083052643335813e-01, + "iterations": 2532, + "real_time": 2.7672113412270238e-01, + "cpu_time": 2.7670993601895744e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105138, - "real_time": 6.6629409044258181e-03, - "cpu_time": 6.6626790979474621e-03, + "iterations": 100828, + "real_time": 6.9398609502726869e-03, + "cpu_time": 6.9397027016304957e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49889, - "real_time": 1.4021988875160225e-02, - "cpu_time": 1.4021621239150914e-02, + "iterations": 47890, + "real_time": 1.4613397414785359e-02, + "cpu_time": 1.4612937857590293e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3257, - "real_time": 2.1324698299416597e-01, - "cpu_time": 2.1323381915873477e-01, + "iterations": 2591, + "real_time": 2.6190706497451505e-01, + "cpu_time": 2.6189954303357776e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3242, - "real_time": 2.1325541403946649e-01, - "cpu_time": 2.1324987754472560e-01, + "iterations": 2615, + "real_time": 2.7797788694985281e-01, + "cpu_time": 2.7796140305927314e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2237, - "real_time": 3.1306709849413450e-01, - "cpu_time": 3.1305870138578440e-01, + "iterations": 1542, + "real_time": 4.6680531139580966e-01, + "cpu_time": 4.6677065175097338e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2232, - "real_time": 3.1772318903663893e-01, - "cpu_time": 3.1771430913978527e-01, + "iterations": 1395, + "real_time": 4.5463244387325846e-01, + "cpu_time": 4.5455844802867373e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 853, - "real_time": 7.7462966301656411e-01, - "cpu_time": 7.7457911137162871e-01, + "iterations": 630, + "real_time": 1.0993280640197178e+00, + "cpu_time": 1.0992607015872999e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 846, - "real_time": 7.8768119761988908e-01, - "cpu_time": 7.8767234278959741e-01, + "iterations": 617, + "real_time": 1.1069473827097751e+00, + "cpu_time": 1.1068412495948152e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5075, - "real_time": 1.3787841451872745e-01, - "cpu_time": 1.3787315527093585e-01, + "iterations": 4809, + "real_time": 1.4524754655041400e-01, + "cpu_time": 1.4524285527136649e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3015, - "real_time": 2.3217374029543073e-01, - "cpu_time": 2.3216454626865654e-01, + "iterations": 3025, + "real_time": 2.3007626006425905e-01, + "cpu_time": 2.3006703537190112e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3114, - "real_time": 2.2473948060815427e-01, - "cpu_time": 2.2473018657675037e-01, + "iterations": 3022, + "real_time": 2.2336854805897435e-01, + "cpu_time": 2.2336302812706782e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2647, - "real_time": 2.6444323265219255e-01, - "cpu_time": 2.6443637363052508e-01, + "iterations": 2604, + "real_time": 2.6297588301922686e-01, + "cpu_time": 2.6296620890936956e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2672, - "real_time": 2.6206671633941686e-01, - "cpu_time": 2.6205630389221563e-01, + "iterations": 2702, + "real_time": 2.5860768371295434e-01, + "cpu_time": 2.5860224167283508e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2749, - "real_time": 2.5497903798701071e-01, - "cpu_time": 2.5497087522735551e-01, + "iterations": 2611, + "real_time": 2.7249755900694189e-01, + "cpu_time": 2.7247722826503162e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5111, - "real_time": 1.3701243949622191e-01, - "cpu_time": 1.3700826589708445e-01, + "iterations": 4746, + "real_time": 1.4528973452712332e-01, + "cpu_time": 1.4528467951959531e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 18a4d5c0..b0234846 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:02:08+00:00 +2025-09-07T13:05:21+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.06, 1.49, 2.37 +Load Average: 3.20, 4.95, 6.48 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 21.6 ms 21.6 ms 31 -MLIR_Conv2D/1 66.7 ms 66.7 ms 11 -Buddy_Conv2D/1 6.12 ms 6.12 ms 114 -Buddy_Corr2D_Constant_Padding/1 4.65 ms 4.65 ms 151 -OpenCV_Filter2D_Constant_Padding/1 8.60 ms 8.60 ms 81 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4847 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2686 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105138 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49889 -Buddy_Erosion2D_Constant_Padding/1 0.213 ms 0.213 ms 3257 -Buddy_Dilation2D_Constant_Padding/1 0.213 ms 0.213 ms 3242 -Buddy_Opening2D_Constant_Padding/1 0.313 ms 0.313 ms 2237 -Buddy_Closing2D_Constant_Padding/1 0.318 ms 0.318 ms 2232 -Buddy_TopHat2D_Constant_Padding/1 0.775 ms 0.775 ms 853 -Buddy_BottomHat2D_Constant_Padding/1 0.788 ms 0.788 ms 846 -OpenCV_Erode2D_Constant_Padding/1 0.138 ms 0.138 ms 5075 -OpenCV_Opening2D_Constant_Padding/1 0.232 ms 0.232 ms 3015 -OpenCV_Closing2D_Constant_Padding/1 0.225 ms 0.225 ms 3114 -OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2647 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2672 -OpenCV_MorphGrad2D_Constant_Padding/1 0.255 ms 0.255 ms 2749 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5111 +Eigen_Convolve2D/1 22.8 ms 22.8 ms 31 +MLIR_Conv2D/1 70.0 ms 70.0 ms 10 +Buddy_Conv2D/1 6.74 ms 6.74 ms 104 +Buddy_Corr2D_Constant_Padding/1 5.07 ms 5.07 ms 137 +OpenCV_Filter2D_Constant_Padding/1 9.14 ms 9.14 ms 77 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.152 ms 0.152 ms 4577 +Buddy_Resize2D_Bilinear_Interpolation/1 0.277 ms 0.277 ms 2532 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100828 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47890 +Buddy_Erosion2D_Constant_Padding/1 0.262 ms 0.262 ms 2591 +Buddy_Dilation2D_Constant_Padding/1 0.278 ms 0.278 ms 2615 +Buddy_Opening2D_Constant_Padding/1 0.467 ms 0.467 ms 1542 +Buddy_Closing2D_Constant_Padding/1 0.455 ms 0.455 ms 1395 +Buddy_TopHat2D_Constant_Padding/1 1.10 ms 1.10 ms 630 +Buddy_BottomHat2D_Constant_Padding/1 1.11 ms 1.11 ms 617 +OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4809 +OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3025 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3022 +OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2604 +OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2702 +OpenCV_MorphGrad2D_Constant_Padding/1 0.272 ms 0.272 ms 2611 +OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4746 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 5e6f1f5e..db6a9c70 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:02:32+00:00", + "date": "2025-09-07T13:05:44+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.03955,1.44629,2.33398], + "load_avg": [3.42627,4.88428,6.42773], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 32, - "real_time": 2.1539924258831888e+01, - "cpu_time": 2.1539544500000002e+01, + "iterations": 31, + "real_time": 2.2537152012509683e+01, + "cpu_time": 2.2535636096774191e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 11, - "real_time": 6.6645104946060613e+01, - "cpu_time": 6.6641695999999982e+01, + "iterations": 10, + "real_time": 6.9293748959898949e+01, + "cpu_time": 6.9290900800000017e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 114, - "real_time": 6.1256526397508484e+00, - "cpu_time": 6.1255026578947369e+00, + "iterations": 106, + "real_time": 6.7181873996302768e+00, + "cpu_time": 6.7177365471698129e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 151, - "real_time": 4.6481645561211948e+00, - "cpu_time": 4.6480527218543033e+00, + "iterations": 145, + "real_time": 4.8399442999527373e+00, + "cpu_time": 4.8398396551724145e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 81, - "real_time": 8.5967951396733149e+00, - "cpu_time": 8.5964210617283996e+00, + "iterations": 78, + "real_time": 9.0141488382449513e+00, + "cpu_time": 9.0134768974358952e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4857, - "real_time": 1.4274283807931035e-01, - "cpu_time": 1.4273323533045087e-01, + "iterations": 4580, + "real_time": 1.5031098018046549e-01, + "cpu_time": 1.5030253820960690e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2693, - "real_time": 2.6048750316083008e-01, - "cpu_time": 2.6047936019309320e-01, + "iterations": 2544, + "real_time": 2.9538726322821474e-01, + "cpu_time": 2.9536609433962269e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105362, - "real_time": 6.6484719802228963e-03, - "cpu_time": 6.6481730130407476e-03, + "iterations": 103180, + "real_time": 6.8470672492163889e-03, + "cpu_time": 6.8469026070944036e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49959, - "real_time": 1.4013711762532320e-02, - "cpu_time": 1.4013170539842676e-02, + "iterations": 48350, + "real_time": 1.4463258982321099e-02, + "cpu_time": 1.4462680806618422e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3220, - "real_time": 2.1420254296860339e-01, - "cpu_time": 2.1418366801242208e-01, + "iterations": 2630, + "real_time": 2.6526380146184347e-01, + "cpu_time": 2.6525710494296539e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3263, - "real_time": 2.1453051982830537e-01, - "cpu_time": 2.1452533159669049e-01, + "iterations": 2560, + "real_time": 2.7432749484432861e-01, + "cpu_time": 2.7431482539062507e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2246, - "real_time": 3.0990404044530906e-01, - "cpu_time": 3.0988853205699018e-01, + "iterations": 1570, + "real_time": 4.5010410961072156e-01, + "cpu_time": 4.5009547324840748e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2217, - "real_time": 3.1024105527619278e-01, - "cpu_time": 3.1023018267929692e-01, + "iterations": 1546, + "real_time": 4.4200593969491908e-01, + "cpu_time": 4.4199154010349267e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 828, - "real_time": 7.7828986948167067e-01, - "cpu_time": 7.7822905797101383e-01, + "iterations": 646, + "real_time": 1.0057703848501478e+00, + "cpu_time": 1.0057355030959745e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 833, - "real_time": 7.9281703040164775e-01, - "cpu_time": 7.9278437575029825e-01, + "iterations": 660, + "real_time": 1.0330350335800287e+00, + "cpu_time": 1.0329946303030324e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5129, - "real_time": 1.3618203899413667e-01, - "cpu_time": 1.3617622031585130e-01, + "iterations": 4837, + "real_time": 1.4295056154221722e-01, + "cpu_time": 1.4294818709944210e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3091, - "real_time": 2.2648242960582463e-01, - "cpu_time": 2.2647447460368761e-01, + "iterations": 3029, + "real_time": 2.3049620383247837e-01, + "cpu_time": 2.3049112611422914e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3096, - "real_time": 2.2607082164141251e-01, - "cpu_time": 2.2606457816537487e-01, + "iterations": 3041, + "real_time": 2.2969253922284963e-01, + "cpu_time": 2.2968828707661890e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2677, - "real_time": 2.6060663286122004e-01, - "cpu_time": 2.6059899327605501e-01, + "iterations": 2631, + "real_time": 2.6287287417041716e-01, + "cpu_time": 2.6285376358798923e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2684, - "real_time": 2.6077424813111921e-01, - "cpu_time": 2.6076512742175839e-01, + "iterations": 2545, + "real_time": 2.6360817092575117e-01, + "cpu_time": 2.6360576895874238e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2763, - "real_time": 2.5325366998663684e-01, - "cpu_time": 2.5324759645313089e-01, + "iterations": 2557, + "real_time": 2.7465159900262109e-01, + "cpu_time": 2.7463849980445809e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5132, - "real_time": 1.3630549929345018e-01, - "cpu_time": 1.3630290568979009e-01, + "iterations": 4852, + "real_time": 1.4438946414293580e-01, + "cpu_time": 1.4438811150041275e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 5610a081..458ce4ce 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:02:32+00:00 +2025-09-07T13:05:44+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.04, 1.45, 2.33 +Load Average: 3.43, 4.88, 6.43 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 21.5 ms 21.5 ms 32 -MLIR_Conv2D/1 66.6 ms 66.6 ms 11 -Buddy_Conv2D/1 6.13 ms 6.13 ms 114 -Buddy_Corr2D_Constant_Padding/1 4.65 ms 4.65 ms 151 -OpenCV_Filter2D_Constant_Padding/1 8.60 ms 8.60 ms 81 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4857 -Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2693 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105362 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49959 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3220 -Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3263 -Buddy_Opening2D_Constant_Padding/1 0.310 ms 0.310 ms 2246 -Buddy_Closing2D_Constant_Padding/1 0.310 ms 0.310 ms 2217 -Buddy_TopHat2D_Constant_Padding/1 0.778 ms 0.778 ms 828 -Buddy_BottomHat2D_Constant_Padding/1 0.793 ms 0.793 ms 833 -OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5129 -OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3091 -OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3096 -OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2677 -OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2684 -OpenCV_MorphGrad2D_Constant_Padding/1 0.253 ms 0.253 ms 2763 -OpenCV_Dilate2D_Constant_Padding/1 0.136 ms 0.136 ms 5132 +Eigen_Convolve2D/1 22.5 ms 22.5 ms 31 +MLIR_Conv2D/1 69.3 ms 69.3 ms 10 +Buddy_Conv2D/1 6.72 ms 6.72 ms 106 +Buddy_Corr2D_Constant_Padding/1 4.84 ms 4.84 ms 145 +OpenCV_Filter2D_Constant_Padding/1 9.01 ms 9.01 ms 78 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4580 +Buddy_Resize2D_Bilinear_Interpolation/1 0.295 ms 0.295 ms 2544 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103180 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48350 +Buddy_Erosion2D_Constant_Padding/1 0.265 ms 0.265 ms 2630 +Buddy_Dilation2D_Constant_Padding/1 0.274 ms 0.274 ms 2560 +Buddy_Opening2D_Constant_Padding/1 0.450 ms 0.450 ms 1570 +Buddy_Closing2D_Constant_Padding/1 0.442 ms 0.442 ms 1546 +Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 646 +Buddy_BottomHat2D_Constant_Padding/1 1.03 ms 1.03 ms 660 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4837 +OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3029 +OpenCV_Closing2D_Constant_Padding/1 0.230 ms 0.230 ms 3041 +OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2631 +OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2545 +OpenCV_MorphGrad2D_Constant_Padding/1 0.275 ms 0.275 ms 2557 +OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4852 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index f7895fce..71e3e968 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:02:56+00:00", + "date": "2025-09-07T13:06:09+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.02783,1.4165,2.30469], + "load_avg": [3.2749,4.73193,6.33545], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 17, - "real_time": 4.1542540359146457e+01, - "cpu_time": 4.1541568352941177e+01, + "iterations": 19, + "real_time": 3.5934302563730036e+01, + "cpu_time": 3.5933023947368419e+01, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5, - "real_time": 1.4357046708464622e+02, - "cpu_time": 1.4356582000000003e+02, + "iterations": 6, + "real_time": 1.2292776505152385e+02, + "cpu_time": 1.2292239783333336e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 67, - "real_time": 1.0501373142226418e+01, - "cpu_time": 1.0501049492537312e+01, + "iterations": 62, + "real_time": 1.1396257747565546e+01, + "cpu_time": 1.1395874725806449e+01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 90, - "real_time": 7.9487750720646648e+00, - "cpu_time": 7.9483699222222235e+00, + "iterations": 85, + "real_time": 8.2714001045507537e+00, + "cpu_time": 8.2711988235294083e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 120, - "real_time": 5.8866093711306648e+00, - "cpu_time": 5.8862758833333402e+00, + "iterations": 115, + "real_time": 6.0265639877837636e+00, + "cpu_time": 6.0262306782608634e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4856, - "real_time": 1.4254383748946511e-01, - "cpu_time": 1.4253524814662261e-01, + "iterations": 4572, + "real_time": 1.5129561903491318e-01, + "cpu_time": 1.5129300874890639e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2692, - "real_time": 2.6072924098700151e-01, - "cpu_time": 2.6071364598811275e-01, + "iterations": 2518, + "real_time": 2.7770467096609763e-01, + "cpu_time": 2.7769441262907074e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105165, - "real_time": 6.6590586224041202e-03, - "cpu_time": 6.6586980934721625e-03, + "iterations": 100684, + "real_time": 6.9422946241566785e-03, + "cpu_time": 6.9421268324659320e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49405, - "real_time": 1.4179655594956524e-02, - "cpu_time": 1.4179255581418873e-02, + "iterations": 48008, + "real_time": 1.4623689299632144e-02, + "cpu_time": 1.4622668409431758e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3218, - "real_time": 2.1431234713257136e-01, - "cpu_time": 2.1430480049720282e-01, + "iterations": 2923, + "real_time": 2.4639664194251917e-01, + "cpu_time": 2.4638985083818007e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3226, - "real_time": 2.1530301595927762e-01, - "cpu_time": 2.1529161376317418e-01, + "iterations": 2623, + "real_time": 2.4554658904131979e-01, + "cpu_time": 2.4553771406786179e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2175, - "real_time": 3.1200053020455371e-01, - "cpu_time": 3.1198446252873530e-01, + "iterations": 1783, + "real_time": 4.1326654179672545e-01, + "cpu_time": 4.1323844083006184e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2264, - "real_time": 3.1231050609936772e-01, - "cpu_time": 3.1229766696113065e-01, + "iterations": 1718, + "real_time": 4.1503581512238008e-01, + "cpu_time": 4.1502305005820794e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 843, - "real_time": 8.2120473230365898e-01, - "cpu_time": 8.2114858362989307e-01, + "iterations": 651, + "real_time": 1.0203976990989825e+00, + "cpu_time": 1.0203406251920124e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 844, - "real_time": 8.1806024757182993e-01, - "cpu_time": 8.1802246800947831e-01, + "iterations": 631, + "real_time": 1.1531298245095225e+00, + "cpu_time": 1.1530850871632337e+00, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5142, - "real_time": 1.3589408018277435e-01, - "cpu_time": 1.3588349844418496e-01, + "iterations": 4907, + "real_time": 1.4306778562319164e-01, + "cpu_time": 1.4306101080089656e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3174, - "real_time": 2.2059779499188564e-01, - "cpu_time": 2.2059140012602413e-01, + "iterations": 3140, + "real_time": 2.2325247858360314e-01, + "cpu_time": 2.2324565222929896e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3164, - "real_time": 2.2124412249392264e-01, - "cpu_time": 2.2124253350189718e-01, + "iterations": 3122, + "real_time": 2.2345905423088000e-01, + "cpu_time": 2.2345698334401021e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2735, - "real_time": 2.5577826435648765e-01, - "cpu_time": 2.5577077038391222e-01, + "iterations": 2724, + "real_time": 2.6043242174535769e-01, + "cpu_time": 2.6042061894273144e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2710, - "real_time": 2.5817889005385641e-01, - "cpu_time": 2.5817290959409672e-01, + "iterations": 2683, + "real_time": 2.5787628267933704e-01, + "cpu_time": 2.5786502795378330e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2787, - "real_time": 2.5088470770032079e-01, - "cpu_time": 2.5087844312881219e-01, + "iterations": 2682, + "real_time": 2.6200129459546095e-01, + "cpu_time": 2.6198928560775564e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5117, - "real_time": 1.3666728594083211e-01, - "cpu_time": 1.3666393453195239e-01, + "iterations": 4938, + "real_time": 1.4114592551774657e-01, + "cpu_time": 1.4114345787768337e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index f707d6a0..46a4f823 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:02:56+00:00 +2025-09-07T13:06:09+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.03, 1.42, 2.30 +Load Average: 3.27, 4.73, 6.34 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 41.5 ms 41.5 ms 17 -MLIR_Conv2D/1 144 ms 144 ms 5 -Buddy_Conv2D/1 10.5 ms 10.5 ms 67 -Buddy_Corr2D_Constant_Padding/1 7.95 ms 7.95 ms 90 -OpenCV_Filter2D_Constant_Padding/1 5.89 ms 5.89 ms 120 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4856 -Buddy_Resize2D_Bilinear_Interpolation/1 0.261 ms 0.261 ms 2692 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105165 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49405 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3218 -Buddy_Dilation2D_Constant_Padding/1 0.215 ms 0.215 ms 3226 -Buddy_Opening2D_Constant_Padding/1 0.312 ms 0.312 ms 2175 -Buddy_Closing2D_Constant_Padding/1 0.312 ms 0.312 ms 2264 -Buddy_TopHat2D_Constant_Padding/1 0.821 ms 0.821 ms 843 -Buddy_BottomHat2D_Constant_Padding/1 0.818 ms 0.818 ms 844 -OpenCV_Erode2D_Constant_Padding/1 0.136 ms 0.136 ms 5142 -OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3174 -OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3164 -OpenCV_TopHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2735 -OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2710 -OpenCV_MorphGrad2D_Constant_Padding/1 0.251 ms 0.251 ms 2787 -OpenCV_Dilate2D_Constant_Padding/1 0.137 ms 0.137 ms 5117 +Eigen_Convolve2D/1 35.9 ms 35.9 ms 19 +MLIR_Conv2D/1 123 ms 123 ms 6 +Buddy_Conv2D/1 11.4 ms 11.4 ms 62 +Buddy_Corr2D_Constant_Padding/1 8.27 ms 8.27 ms 85 +OpenCV_Filter2D_Constant_Padding/1 6.03 ms 6.03 ms 115 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.151 ms 0.151 ms 4572 +Buddy_Resize2D_Bilinear_Interpolation/1 0.278 ms 0.278 ms 2518 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100684 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 48008 +Buddy_Erosion2D_Constant_Padding/1 0.246 ms 0.246 ms 2923 +Buddy_Dilation2D_Constant_Padding/1 0.246 ms 0.246 ms 2623 +Buddy_Opening2D_Constant_Padding/1 0.413 ms 0.413 ms 1783 +Buddy_Closing2D_Constant_Padding/1 0.415 ms 0.415 ms 1718 +Buddy_TopHat2D_Constant_Padding/1 1.02 ms 1.02 ms 651 +Buddy_BottomHat2D_Constant_Padding/1 1.15 ms 1.15 ms 631 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4907 +OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3140 +OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3122 +OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2724 +OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2683 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2682 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4938 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 7aeb0253..f2528e67 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:03:20+00:00", + "date": "2025-09-07T13:06:33+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1.01709,1.38184,2.26807], + "load_avg": [2.90137,4.52734,6.22461], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 20, - "real_time": 3.4217556379735470e+01, - "cpu_time": 3.4216377350000002e+01, + "iterations": 19, + "real_time": 3.6031454017287807e+01, + "cpu_time": 3.6030928210526312e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 6, - "real_time": 1.1868627804021041e+02, - "cpu_time": 1.1867920516666668e+02, + "real_time": 1.2390441261231899e+02, + "cpu_time": 1.2389855900000002e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 67, - "real_time": 1.0498678089300199e+01, - "cpu_time": 1.0498225208955224e+01, + "iterations": 61, + "real_time": 1.1435016989707947e+01, + "cpu_time": 1.1434898901639347e+01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 90, - "real_time": 7.8919309501846628e+00, - "cpu_time": 7.8916826222222225e+00, + "iterations": 86, + "real_time": 8.0684243679739716e+00, + "cpu_time": 8.0681411860465122e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 119, - "real_time": 5.8879929305124685e+00, - "cpu_time": 5.8877909243697495e+00, + "iterations": 116, + "real_time": 6.0479930312982919e+00, + "cpu_time": 6.0477856465517252e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4857, - "real_time": 1.4250408969274980e-01, - "cpu_time": 1.4250040910026768e-01, + "iterations": 4547, + "real_time": 1.4929789562316578e-01, + "cpu_time": 1.4929170332087077e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2690, - "real_time": 2.6047558395614412e-01, - "cpu_time": 2.6046792565055787e-01, + "iterations": 2567, + "real_time": 2.7209066542429267e-01, + "cpu_time": 2.7208513245033111e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 105068, - "real_time": 6.6534977810603496e-03, - "cpu_time": 6.6533541230441263e-03, + "iterations": 102031, + "real_time": 6.8424128531881564e-03, + "cpu_time": 6.8420196802932457e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 49449, - "real_time": 1.4174878487268949e-02, - "cpu_time": 1.4174551719953883e-02, + "iterations": 47780, + "real_time": 1.4569356260987295e-02, + "cpu_time": 1.4568733444956036e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3244, - "real_time": 2.1404832539535626e-01, - "cpu_time": 2.1404216307028326e-01, + "iterations": 2938, + "real_time": 2.3473830547480456e-01, + "cpu_time": 2.3473132573179031e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3243, - "real_time": 2.2520982659491645e-01, - "cpu_time": 2.2519997533148287e-01, + "iterations": 2922, + "real_time": 2.3591566211266685e-01, + "cpu_time": 2.3590406913073245e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2260, - "real_time": 3.0706161385880110e-01, - "cpu_time": 3.0704632876106208e-01, + "iterations": 1840, + "real_time": 3.6312726164317649e-01, + "cpu_time": 3.6310779728260872e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2223, - "real_time": 3.1307990965653365e-01, - "cpu_time": 3.1306463832658527e-01, + "iterations": 1905, + "real_time": 3.7938127015519330e-01, + "cpu_time": 3.7936665511811024e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 827, - "real_time": 8.1769454246781037e-01, - "cpu_time": 8.1765177992745008e-01, + "iterations": 672, + "real_time": 9.9142064296063925e-01, + "cpu_time": 9.9135187946428460e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 861, - "real_time": 7.9650212507630058e-01, - "cpu_time": 7.9645093612078921e-01, + "iterations": 684, + "real_time": 9.8383305213081906e-01, + "cpu_time": 9.8378608333333095e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5101, - "real_time": 1.3665298364765854e-01, - "cpu_time": 1.3664968535581246e-01, + "iterations": 4933, + "real_time": 1.4217775303718649e-01, + "cpu_time": 1.4216965781471705e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3187, - "real_time": 2.1934081926815630e-01, - "cpu_time": 2.1933629055538131e-01, + "iterations": 3207, + "real_time": 2.2116656922498595e-01, + "cpu_time": 2.2115779700654803e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3142, - "real_time": 2.2223215659263407e-01, - "cpu_time": 2.2222677021005652e-01, + "iterations": 3201, + "real_time": 2.1932033547867988e-01, + "cpu_time": 2.1931359012808557e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2731, - "real_time": 2.5569188675571386e-01, - "cpu_time": 2.5568335042109103e-01, + "iterations": 2623, + "real_time": 2.5874181724076412e-01, + "cpu_time": 2.5873152497140717e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2740, - "real_time": 2.5545844373150461e-01, - "cpu_time": 2.5545215291970780e-01, + "iterations": 2746, + "real_time": 2.5197450893687579e-01, + "cpu_time": 2.5196135906773559e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2815, - "real_time": 2.4856514103882690e-01, - "cpu_time": 2.4855898934280626e-01, + "iterations": 2650, + "real_time": 2.6740114503311663e-01, + "cpu_time": 2.6738492188679175e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 5206, - "real_time": 1.3451610773778905e-01, - "cpu_time": 1.3450989838647703e-01, + "iterations": 4842, + "real_time": 1.4409735623419359e-01, + "cpu_time": 1.4409410636100756e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index f576c1e9..34d10b59 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-06-01T10:03:20+00:00 +2025-09-07T13:06:33+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.02, 1.38, 2.27 +Load Average: 2.90, 4.53, 6.22 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 34.2 ms 34.2 ms 20 -MLIR_Conv2D/1 119 ms 119 ms 6 -Buddy_Conv2D/1 10.5 ms 10.5 ms 67 -Buddy_Corr2D_Constant_Padding/1 7.89 ms 7.89 ms 90 -OpenCV_Filter2D_Constant_Padding/1 5.89 ms 5.89 ms 119 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.143 ms 0.143 ms 4857 -Buddy_Resize2D_Bilinear_Interpolation/1 0.260 ms 0.260 ms 2690 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 105068 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 49449 -Buddy_Erosion2D_Constant_Padding/1 0.214 ms 0.214 ms 3244 -Buddy_Dilation2D_Constant_Padding/1 0.225 ms 0.225 ms 3243 -Buddy_Opening2D_Constant_Padding/1 0.307 ms 0.307 ms 2260 -Buddy_Closing2D_Constant_Padding/1 0.313 ms 0.313 ms 2223 -Buddy_TopHat2D_Constant_Padding/1 0.818 ms 0.818 ms 827 -Buddy_BottomHat2D_Constant_Padding/1 0.797 ms 0.796 ms 861 -OpenCV_Erode2D_Constant_Padding/1 0.137 ms 0.137 ms 5101 -OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3187 -OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3142 -OpenCV_TopHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2731 -OpenCV_BottomHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2740 -OpenCV_MorphGrad2D_Constant_Padding/1 0.249 ms 0.249 ms 2815 -OpenCV_Dilate2D_Constant_Padding/1 0.135 ms 0.135 ms 5206 +Eigen_Convolve2D/1 36.0 ms 36.0 ms 19 +MLIR_Conv2D/1 124 ms 124 ms 6 +Buddy_Conv2D/1 11.4 ms 11.4 ms 61 +Buddy_Corr2D_Constant_Padding/1 8.07 ms 8.07 ms 86 +OpenCV_Filter2D_Constant_Padding/1 6.05 ms 6.05 ms 116 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4547 +Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2567 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102031 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47780 +Buddy_Erosion2D_Constant_Padding/1 0.235 ms 0.235 ms 2938 +Buddy_Dilation2D_Constant_Padding/1 0.236 ms 0.236 ms 2922 +Buddy_Opening2D_Constant_Padding/1 0.363 ms 0.363 ms 1840 +Buddy_Closing2D_Constant_Padding/1 0.379 ms 0.379 ms 1905 +Buddy_TopHat2D_Constant_Padding/1 0.991 ms 0.991 ms 672 +Buddy_BottomHat2D_Constant_Padding/1 0.984 ms 0.984 ms 684 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4933 +OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3207 +OpenCV_Closing2D_Constant_Padding/1 0.219 ms 0.219 ms 3201 +OpenCV_TopHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2623 +OpenCV_BottomHat2D_Constant_Padding/1 0.252 ms 0.252 ms 2746 +OpenCV_MorphGrad2D_Constant_Padding/1 0.267 ms 0.267 ms 2650 +OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4842 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log index 3a6bab16..00654642 100644 --- a/test_result/imageprocessing/image-processing-result.log +++ b/test_result/imageprocessing/image-processing-result.log @@ -1,4 +1,4 @@ -Benchmark results - Sun Jun 1 09:59:37 UTC 2025 +Benchmark results - Sun Sep 7 13:02:48 UTC 2025 Testing SSE support SSE is supported. Running image-processing-benchmark for SSE diff --git a/test_result/vectorization/vectorization_matrix.json b/test_result/vectorization/vectorization_matrix.json index b8b5ca10..e63f2898 100644 --- a/test_result/vectorization/vectorization_matrix.json +++ b/test_result/vectorization/vectorization_matrix.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-06-01T10:11:11+00:00", + "date": "2025-09-07T13:14:33+00:00", "host_name": "4ed4bacfe45d", "executable": "./vectorization-matrix-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [1,1.08789,1.76172], + "load_avg": [3.06299,3.26074,4.88916], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 37302822, - "real_time": 1.8817115302162652e+01, - "cpu_time": 1.8816787319736829e+01, + "iterations": 36245532, + "real_time": 1.9330042278500677e+01, + "cpu_time": 1.9329765362527993e+01, "time_unit": "ns" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 35030976, - "real_time": 2.0450598917981729e+01, - "cpu_time": 2.0450241780303237e+01, + "iterations": 33833597, + "real_time": 2.1012985950814887e+01, + "cpu_time": 2.1012694364125689e+01, "time_unit": "ns" } ] diff --git a/test_result/vectorization/vectorization_matrix.log b/test_result/vectorization/vectorization_matrix.log index 0fb91b1e..11e33aa6 100644 --- a/test_result/vectorization/vectorization_matrix.log +++ b/test_result/vectorization/vectorization_matrix.log @@ -1,4 +1,4 @@ -2025-06-01T10:11:11+00:00 +2025-09-07T13:14:33+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.09, 1.76 +Load Average: 3.06, 3.26, 4.89 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 18.8 ns 18.8 ns 37302822 -MLIR_MatVec/1 20.5 ns 20.5 ns 35030976 +MLIR_MatMul/1 19.3 ns 19.3 ns 36245532 +MLIR_MatVec/1 21.0 ns 21.0 ns 33833597 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log index c1bb63ff..fd5ab9b0 100644 --- a/test_result/vectorization/vectorization_result.log +++ b/test_result/vectorization/vectorization_result.log @@ -1,4 +1,4 @@ -Vectorization Benchmark - Sun Jun 1 10:11:04 UTC 2025 +Vectorization Benchmark - Sun Sep 7 13:14:27 UTC 2025 [Info] Starting vectorization-matrix-benchmark build... [Info] Running CMake configuration... -- The CXX compiler identification is GNU 11.4.0 @@ -39,8 +39,8 @@ Vectorization Benchmark - Sun Jun 1 10:11:04 UTC 2025 [Info] Building vectorization-matrix-benchmark... [1/17] Generating mlir-matmul.o [2/17] Generating mlir-matvec.o -[3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a -[4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a +[3/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatVec.a +[4/17] Linking CXX static library benchmarks/Vectorization/libMLIRMatMul.a [5/17] Creating directories for 'project_googlebenchmark' [6/17] Performing download step (git clone) for 'project_googlebenchmark' Cloning into 'project_googlebenchmark'... @@ -128,23 +128,23 @@ Call Stack (most recent call first): -- Generating done -- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build [10/17] Performing build step for 'project_googlebenchmark' -[1/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o -[2/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o -[3/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o -[4/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[1/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o +[2/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[3/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[4/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o [5/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o -[6/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o -[7/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o -[8/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o +[7/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o [9/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o [10/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o [11/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o -[12/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o +[12/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o [13/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o -[14/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o -[15/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o -[16/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o -[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o +[15/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o +[17/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o [18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o [19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o [20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o @@ -176,11 +176,11 @@ Call Stack (most recent call first): [12/17] No test step for 'project_googlebenchmark' [13/17] Completed 'project_googlebenchmark' [14/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/Main.cpp.o -[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o -[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o +[15/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatMulBenchmark.cpp.o +[16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o [17/17] Linking CXX executable bin/vectorization-matrix-benchmark [Info] Running vectorization-matrix-benchmark... -2025-06-01T10:11:11+00:00 +2025-09-07T13:14:33+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -188,13 +188,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 1.00, 1.09, 1.76 +Load Average: 3.06, 3.26, 4.89 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 18.8 ns 18.8 ns 37302822 -MLIR_MatVec/1 20.5 ns 20.5 ns 35030976 +MLIR_MatMul/1 19.3 ns 19.3 ns 36245532 +MLIR_MatVec/1 21.0 ns 21.0 ns 33833597 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] From ee1d3eaa0c9e1c153b0ad4c138031d711257b4d8 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 16:12:58 +0200 Subject: [PATCH 44/52] update --- .github/workflows/bench.yml | 10 +- .../build_results_crosscompile_summary.log | 28 +---- .../deeplearning/build_results_summary.log | 32 +++++ .../deeplearning/run_results_summary.log | 109 +++++------------- 4 files changed, 70 insertions(+), 109 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 2d87e35e..cc2bb358 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -33,7 +33,15 @@ jobs: # ------------------------------------------------------------ - name: Build & run benchmarks run: | - /home/quliu/buddy-complier-workspace/run_docker.sh + set -e + TEST_DIR=/home/quliu/buddy-complier-workspace/buddy-benchmark/test_result + if [ -d "$TEST_DIR" ] && [ "$(find "$TEST_DIR" -type f | wc -l)" -gt 0 ]; then + echo "[Skip] $TEST_DIR already has benchmark outputs; skipping build/run." + echo "[Info] File count: $(find "$TEST_DIR" -type f | wc -l)" + else + echo "[Run] No existing results detected; running benchmarks in Docker." + /home/quliu/buddy-complier-workspace/run_docker.sh + fi # ------------------------------------------------------------ # 2½) decide which date folder we’re about to publish # ------------------------------------------------------------ diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log index eaa29439..461312f5 100644 --- a/test_result/deeplearning/build_results_crosscompile_summary.log +++ b/test_result/deeplearning/build_results_crosscompile_summary.log @@ -1,29 +1,3 @@ [Failed] Build of 'dl-model-tinyllama-benchmark' [Failed] Build of 'dl-model-mobilenetv3-benchmark' -[Failed] Build of 'dl-model-lenet-benchmark' -[Failed] Build of 'dl-model-bert-benchmark' -[Failed] Build of 'dl-model-whisper-benchmark' -[Failed] Build of 'dl-model-resnet18-benchmark' -[Failed] Build of 'dl-layer-ffn-benchmark' -[Failed] Build of 'dl-layer-selfattention-benchmark' -[Failed] Build of 'dl-layer-rmsnorm-benchmark' -[Failed] Build of 'dl-op-linalg-matmul-benchmark' -[Failed] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' -[Failed] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' -[Failed] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' -[Failed] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' -[Failed] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' -[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' -[Failed] Build of 'dl-op-linalg-arithaddf-benchmark' -[Failed] Build of 'dl-op-linalg-arithdivf-benchmark' -[Failed] Build of 'dl-op-linalg-arithmulf-benchmark' -[Failed] Build of 'dl-op-linalg-arithnegf-benchmark' -[Failed] Build of 'dl-op-linalg-arithsubf-benchmark' -[Failed] Build of 'dl-op-linalg-mathfpow-benchmark' -[Failed] Build of 'dl-op-linalg-mathrsqrt-benchmark' -[Failed] Build of 'dl-op-linalg-mathexp-benchmark' -[Failed] Build of 'dl-op-linalg-reduceaddf-benchmark' -[Failed] Build of 'dl-op-linalg-reducemaxf-benchmark' -[Failed] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' -[Failed] Build of 'dl-op-tosa-transpose-benchmark' -[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' +[Success] Build of 'dl-model-lenet-benchmark' diff --git a/test_result/deeplearning/build_results_summary.log b/test_result/deeplearning/build_results_summary.log index e69de29b..90bf162d 100644 --- a/test_result/deeplearning/build_results_summary.log +++ b/test_result/deeplearning/build_results_summary.log @@ -0,0 +1,32 @@ +[Failed] Build of 'dl-model-mobilenetv3-benchmark' +[Failed] Build of 'dl-model-lenet-benchmark' +[Failed] Build of 'dl-model-bert-benchmark' +[Failed] Build of 'dl-model-whisper-benchmark' +[Failed] Build of 'dl-model-resnet18-benchmark' +[Failed] Build of 'dl-layer-ffn-benchmark' +[Failed] Build of 'dl-layer-selfattention-benchmark' +[Failed] Build of 'dl-layer-rmsnorm-benchmark' +[Failed] Build of 'dl-op-linalg-matmul-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Failed] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Failed] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Failed] Build of 'dl-op-linalg-arithaddf-benchmark' +[Failed] Build of 'dl-op-linalg-arithdivf-benchmark' +[Failed] Build of 'dl-op-linalg-arithmulf-benchmark' +[Failed] Build of 'dl-op-linalg-arithnegf-benchmark' +[Failed] Build of 'dl-op-linalg-arithsubf-benchmark' +[Failed] Build of 'dl-op-linalg-mathfpow-benchmark' +[Failed] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Failed] Build of 'dl-op-linalg-mathexp-benchmark' +[Failed] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Failed] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Failed] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Failed] Build of 'dl-op-tosa-transpose-benchmark' +[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' +[Failed] Build of 'dl-model-tinyllama-benchmark' +[Failed] Build of 'dl-model-mobilenetv3-benchmark' +[Success] Build of 'dl-model-lenet-benchmark' +[Failed] Build of 'dl-model-bert-benchmark' diff --git a/test_result/deeplearning/run_results_summary.log b/test_result/deeplearning/run_results_summary.log index b6ac768a..e39e6015 100644 --- a/test_result/deeplearning/run_results_summary.log +++ b/test_result/deeplearning/run_results_summary.log @@ -1,82 +1,29 @@ -[Success] Run of 'dl-model-tinyllama-benchmark' - ↳ stdout/stderr → dl-model-tinyllama-benchmark.log - ↳ gbench JSON → dl-model-tinyllama-benchmark.json -[Success] Run of 'dl-model-mobilenetv3-benchmark' - ↳ stdout/stderr → dl-model-mobilenetv3-benchmark.log - ↳ gbench JSON → dl-model-mobilenetv3-benchmark.json -[Success] Run of 'dl-model-lenet-benchmark' - ↳ stdout/stderr → dl-model-lenet-benchmark.log - ↳ gbench JSON → dl-model-lenet-benchmark.json +[Missing] Executable not found for 'dl-model-tinyllama-benchmark' +[Missing] Executable not found for 'dl-model-mobilenetv3-benchmark' +[Missing] Executable not found for 'dl-model-lenet-benchmark' [Missing] Executable not found for 'dl-model-bert-benchmark' -[Failed] Run of 'dl-model-whisper-benchmark' - ↳ stdout/stderr → dl-model-whisper-benchmark.log (may contain errors) -[Success] Run of 'dl-model-resnet18-benchmark' - ↳ stdout/stderr → dl-model-resnet18-benchmark.log - ↳ gbench JSON → dl-model-resnet18-benchmark.json -[Success] Run of 'dl-layer-ffn-benchmark' - ↳ stdout/stderr → dl-layer-ffn-benchmark.log - ↳ gbench JSON → dl-layer-ffn-benchmark.json -[Success] Run of 'dl-layer-selfattention-benchmark' - ↳ stdout/stderr → dl-layer-selfattention-benchmark.log - ↳ gbench JSON → dl-layer-selfattention-benchmark.json -[Success] Run of 'dl-layer-rmsnorm-benchmark' - ↳ stdout/stderr → dl-layer-rmsnorm-benchmark.log - ↳ gbench JSON → dl-layer-rmsnorm-benchmark.json -[Success] Run of 'dl-op-linalg-matmul-benchmark' - ↳ stdout/stderr → dl-op-linalg-matmul-benchmark.log - ↳ gbench JSON → dl-op-linalg-matmul-benchmark.json -[Success] Run of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' - ↳ stdout/stderr → dl-op-linalg-conv2d-nchw-fchw-benchmark.log - ↳ gbench JSON → dl-op-linalg-conv2d-nchw-fchw-benchmark.json -[Success] Run of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' - ↳ stdout/stderr → dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log - ↳ gbench JSON → dl-op-linalg-conv2d-nhwc-hwcf-benchmark.json -[Success] Run of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' - ↳ stdout/stderr → dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log - ↳ gbench JSON → dl-op-linalg-conv2d-nhwc-fhwc-benchmark.json -[Success] Run of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' - ↳ stdout/stderr → dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log - ↳ gbench JSON → dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.json -[Success] Run of 'dl-op-linalg-pooling-nhwc-sum-benchmark' - ↳ stdout/stderr → dl-op-linalg-pooling-nhwc-sum-benchmark.log - ↳ gbench JSON → dl-op-linalg-pooling-nhwc-sum-benchmark.json -[Success] Run of 'dl-op-linalg-batch-matmul-benchmark' - ↳ stdout/stderr → dl-op-linalg-batch-matmul-benchmark.log - ↳ gbench JSON → dl-op-linalg-batch-matmul-benchmark.json -[Success] Run of 'dl-op-linalg-arithaddf-benchmark' - ↳ stdout/stderr → dl-op-linalg-arithaddf-benchmark.log - ↳ gbench JSON → dl-op-linalg-arithaddf-benchmark.json -[Success] Run of 'dl-op-linalg-arithdivf-benchmark' - ↳ stdout/stderr → dl-op-linalg-arithdivf-benchmark.log - ↳ gbench JSON → dl-op-linalg-arithdivf-benchmark.json -[Success] Run of 'dl-op-linalg-arithmulf-benchmark' - ↳ stdout/stderr → dl-op-linalg-arithmulf-benchmark.log - ↳ gbench JSON → dl-op-linalg-arithmulf-benchmark.json -[Success] Run of 'dl-op-linalg-arithnegf-benchmark' - ↳ stdout/stderr → dl-op-linalg-arithnegf-benchmark.log - ↳ gbench JSON → dl-op-linalg-arithnegf-benchmark.json -[Success] Run of 'dl-op-linalg-arithsubf-benchmark' - ↳ stdout/stderr → dl-op-linalg-arithsubf-benchmark.log - ↳ gbench JSON → dl-op-linalg-arithsubf-benchmark.json -[Success] Run of 'dl-op-linalg-mathfpow-benchmark' - ↳ stdout/stderr → dl-op-linalg-mathfpow-benchmark.log - ↳ gbench JSON → dl-op-linalg-mathfpow-benchmark.json -[Success] Run of 'dl-op-linalg-mathrsqrt-benchmark' - ↳ stdout/stderr → dl-op-linalg-mathrsqrt-benchmark.log - ↳ gbench JSON → dl-op-linalg-mathrsqrt-benchmark.json -[Success] Run of 'dl-op-linalg-mathexp-benchmark' - ↳ stdout/stderr → dl-op-linalg-mathexp-benchmark.log - ↳ gbench JSON → dl-op-linalg-mathexp-benchmark.json -[Failed] Run of 'dl-op-linalg-reduceaddf-benchmark' - ↳ stdout/stderr → dl-op-linalg-reduceaddf-benchmark.log (may contain errors) -[Failed] Run of 'dl-op-linalg-reducemaxf-benchmark' - ↳ stdout/stderr → dl-op-linalg-reducemaxf-benchmark.log (may contain errors) -[Success] Run of 'dl-op-linalg-softmax-exp-sum-div-benchmark' - ↳ stdout/stderr → dl-op-linalg-softmax-exp-sum-div-benchmark.log - ↳ gbench JSON → dl-op-linalg-softmax-exp-sum-div-benchmark.json -[Success] Run of 'dl-op-tosa-transpose-benchmark' - ↳ stdout/stderr → dl-op-tosa-transpose-benchmark.log - ↳ gbench JSON → dl-op-tosa-transpose-benchmark.json -[Success] Run of 'dl-op-matmul-transpose-b-benchmark' - ↳ stdout/stderr → dl-op-matmul-transpose-b-benchmark.log - ↳ gbench JSON → dl-op-matmul-transpose-b-benchmark.json +[Missing] Executable not found for 'dl-model-whisper-benchmark' +[Missing] Executable not found for 'dl-model-resnet18-benchmark' +[Missing] Executable not found for 'dl-layer-ffn-benchmark' +[Missing] Executable not found for 'dl-layer-selfattention-benchmark' +[Missing] Executable not found for 'dl-layer-rmsnorm-benchmark' +[Missing] Executable not found for 'dl-op-linalg-matmul-benchmark' +[Missing] Executable not found for 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Missing] Executable not found for 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Missing] Executable not found for 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Missing] Executable not found for 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Missing] Executable not found for 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Missing] Executable not found for 'dl-op-linalg-batch-matmul-benchmark' +[Missing] Executable not found for 'dl-op-linalg-arithaddf-benchmark' +[Missing] Executable not found for 'dl-op-linalg-arithdivf-benchmark' +[Missing] Executable not found for 'dl-op-linalg-arithmulf-benchmark' +[Missing] Executable not found for 'dl-op-linalg-arithnegf-benchmark' +[Missing] Executable not found for 'dl-op-linalg-arithsubf-benchmark' +[Missing] Executable not found for 'dl-op-linalg-mathfpow-benchmark' +[Missing] Executable not found for 'dl-op-linalg-mathrsqrt-benchmark' +[Missing] Executable not found for 'dl-op-linalg-mathexp-benchmark' +[Missing] Executable not found for 'dl-op-linalg-reduceaddf-benchmark' +[Missing] Executable not found for 'dl-op-linalg-reducemaxf-benchmark' +[Missing] Executable not found for 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Missing] Executable not found for 'dl-op-tosa-transpose-benchmark' +[Missing] Executable not found for 'dl-op-matmul-transpose-b-benchmark' From 0d88fec17edf8a95b25fe74c459b64c6b3cf87ed Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 16:18:07 +0200 Subject: [PATCH 45/52] update --- .github/workflows/bench.yml | 50 +++++++++---------- .../build_results_crosscompile_summary.log | 1 + 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index cc2bb358..0c041f18 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -80,8 +80,8 @@ jobs: rm -rf latest mkdir -p latest cat > latest/index.html < - EOF + +EOF echo "[Info] benchmarks/latest now points to ../${latest}/" # ------------------------------------------------------------ @@ -92,8 +92,8 @@ jobs: run: | set -e cat > benchmarks/index.html <<'EOF' - - EOF + +EOF - name: Upload site artifact uses: actions/upload-pages-artifact@v3 @@ -106,22 +106,22 @@ jobs: mkdir -p "$run_root" cat > "$run_root/index.html" <<'EOF' - --- - layout: default - title: Benchmark run - nav_exclude: true - --- +--- +layout: default +title: Benchmark run +nav_exclude: true +--- -

            Benchmark results

            +

            Benchmark results

            -
              - {% for f in site.static_files %} - {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %} -
            • {{ f.name }}
            • - {% endif %} - {% endfor %} -
            - EOF +
              +{% for f in site.static_files %} + {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %} +
            • {{ f.name }}
            • + {% endif %} +{% endfor %} +
            +EOF - name: Build top-level benchmarks index (list all runs) @@ -132,13 +132,13 @@ jobs: mkdir -p benchmarks { cat <<'HTML' - --- - layout: default - title: Benchmarks - --- -

            Benchmark runs

            -

            Select a date and commit:

            - HTML +--- +layout: default +title: Benchmarks +--- +

            Benchmark runs

            +

            Select a date and commit:

            +HTML # List dates newest first for d in $(ls -1d benchmarks/20*/ | sort -r); do diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log index 461312f5..7d53f5be 100644 --- a/test_result/deeplearning/build_results_crosscompile_summary.log +++ b/test_result/deeplearning/build_results_crosscompile_summary.log @@ -1,3 +1,4 @@ [Failed] Build of 'dl-model-tinyllama-benchmark' [Failed] Build of 'dl-model-mobilenetv3-benchmark' [Success] Build of 'dl-model-lenet-benchmark' +[Failed] Build of 'dl-model-bert-benchmark' From 047e865d053b22dc9d941da40c61fa151dbb1a8c Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 16:21:26 +0200 Subject: [PATCH 46/52] [CI] Fix heredoc indentation and add test_result skip --- .github/workflows/bench.yml | 50 ++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 0c041f18..cc2bb358 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -80,8 +80,8 @@ jobs: rm -rf latest mkdir -p latest cat > latest/index.html < -EOF + + EOF echo "[Info] benchmarks/latest now points to ../${latest}/" # ------------------------------------------------------------ @@ -92,8 +92,8 @@ EOF run: | set -e cat > benchmarks/index.html <<'EOF' - -EOF + + EOF - name: Upload site artifact uses: actions/upload-pages-artifact@v3 @@ -106,22 +106,22 @@ EOF mkdir -p "$run_root" cat > "$run_root/index.html" <<'EOF' ---- -layout: default -title: Benchmark run -nav_exclude: true ---- + --- + layout: default + title: Benchmark run + nav_exclude: true + --- -

            Benchmark results

            +

            Benchmark results

            -
              -{% for f in site.static_files %} - {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %} -
            • {{ f.name }}
            • - {% endif %} -{% endfor %} -
            -EOF +
              + {% for f in site.static_files %} + {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %} +
            • {{ f.name }}
            • + {% endif %} + {% endfor %} +
            + EOF - name: Build top-level benchmarks index (list all runs) @@ -132,13 +132,13 @@ EOF mkdir -p benchmarks { cat <<'HTML' ---- -layout: default -title: Benchmarks ---- -

            Benchmark runs

            -

            Select a date and commit:

            -HTML + --- + layout: default + title: Benchmarks + --- +

            Benchmark runs

            +

            Select a date and commit:

            + HTML # List dates newest first for d in $(ls -1d benchmarks/20*/ | sort -r); do From 9e96d50608f93895e857a6f0f3220f219b5fcdfa Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 16:24:01 +0200 Subject: [PATCH 47/52] update --- .github/workflows/bench.yml | 8 +- .../build_results_crosscompile_summary.log | 50 ++++++ .../deeplearning/build_results_summary.log | 25 +++ .../deeplearning/dl-layer-ffn-benchmark.log | 18 -- .../dl-layer-rmsnorm-benchmark.log | 18 -- .../dl-layer-selfattention-benchmark.log | 18 -- .../deeplearning/dl-model-lenet-benchmark.log | 19 --- .../dl-op-linalg-arithaddf-benchmark.log | 19 --- .../dl-op-linalg-arithdivf-benchmark.log | 19 --- .../dl-op-linalg-arithmulf-benchmark.log | 19 --- .../dl-op-linalg-arithnegf-benchmark.log | 19 --- .../dl-op-linalg-arithsubf-benchmark.log | 19 --- ...l-op-linalg-conv2d-nchw-fchw-benchmark.log | 19 --- ...l-op-linalg-conv2d-nhwc-fhwc-benchmark.log | 21 --- ...l-op-linalg-conv2d-nhwc-hwcf-benchmark.log | 19 --- ...g-depthwise-conv-2d-nhwc-hwc-benchmark.log | 20 +-- .../dl-op-linalg-mathexp-benchmark.log | 19 --- .../dl-op-linalg-mathfpow-benchmark.log | 19 --- .../dl-op-linalg-mathrsqrt-benchmark.log | 19 --- ...l-op-linalg-pooling-nhwc-sum-benchmark.log | 19 --- .../dl-op-linalg-reduceaddf-benchmark.log | 10 -- .../dl-op-linalg-reducemaxf-benchmark.log | 10 -- ...p-linalg-softmax-exp-sum-div-benchmark.log | 19 --- .../deeplearning/run_results_summary.log | 60 ++++--- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++-------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 156 +++++++----------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 67 +++----- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 +++++++-------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 134 +++++++-------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++--- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 +++++++-------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 +++++++-------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++--- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 130 +++++++-------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 46 +++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 132 +++++++-------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 46 +++--- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 132 +++++++-------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 128 +++++++------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++--- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 132 +++++++-------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 128 +++++++------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 46 +++--- .../image-processing-result.log | 58 +++---- 49 files changed, 1215 insertions(+), 1551 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index cc2bb358..f5b43284 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -81,7 +81,7 @@ jobs: mkdir -p latest cat > latest/index.html < - EOF +EOF echo "[Info] benchmarks/latest now points to ../${latest}/" # ------------------------------------------------------------ @@ -93,7 +93,7 @@ jobs: set -e cat > benchmarks/index.html <<'EOF' - EOF +EOF - name: Upload site artifact uses: actions/upload-pages-artifact@v3 @@ -121,7 +121,7 @@ jobs: {% endif %} {% endfor %}
          - EOF +EOF - name: Build top-level benchmarks index (list all runs) @@ -138,7 +138,7 @@ jobs: ---

          Benchmark runs

          Select a date and commit:

          - HTML +HTML # List dates newest first for d in $(ls -1d benchmarks/20*/ | sort -r); do diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log index 7d53f5be..ddd8a7cc 100644 --- a/test_result/deeplearning/build_results_crosscompile_summary.log +++ b/test_result/deeplearning/build_results_crosscompile_summary.log @@ -2,3 +2,53 @@ [Failed] Build of 'dl-model-mobilenetv3-benchmark' [Success] Build of 'dl-model-lenet-benchmark' [Failed] Build of 'dl-model-bert-benchmark' +[Failed] Build of 'dl-model-whisper-benchmark' +[Failed] Build of 'dl-model-resnet18-benchmark' +[Success] Build of 'dl-layer-ffn-benchmark' +[Success] Build of 'dl-layer-selfattention-benchmark' +[Success] Build of 'dl-layer-rmsnorm-benchmark' +[Failed] Build of 'dl-op-linalg-matmul-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Success] Build of 'dl-op-linalg-arithaddf-benchmark' +[Success] Build of 'dl-op-linalg-arithdivf-benchmark' +[Success] Build of 'dl-op-linalg-arithmulf-benchmark' +[Success] Build of 'dl-op-linalg-arithnegf-benchmark' +[Success] Build of 'dl-op-linalg-arithsubf-benchmark' +[Success] Build of 'dl-op-linalg-mathfpow-benchmark' +[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Build of 'dl-op-linalg-mathexp-benchmark' +[Success] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Success] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Failed] Build of 'dl-op-tosa-transpose-benchmark' +[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' +[Failed] Build of 'dl-model-whisper-benchmark' +[Failed] Build of 'dl-model-resnet18-benchmark' +[Success] Build of 'dl-layer-ffn-benchmark' +[Success] Build of 'dl-layer-selfattention-benchmark' +[Success] Build of 'dl-layer-rmsnorm-benchmark' +[Failed] Build of 'dl-op-linalg-matmul-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Success] Build of 'dl-op-linalg-arithaddf-benchmark' +[Success] Build of 'dl-op-linalg-arithdivf-benchmark' +[Success] Build of 'dl-op-linalg-arithmulf-benchmark' +[Success] Build of 'dl-op-linalg-arithnegf-benchmark' +[Success] Build of 'dl-op-linalg-arithsubf-benchmark' +[Success] Build of 'dl-op-linalg-mathfpow-benchmark' +[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Build of 'dl-op-linalg-mathexp-benchmark' +[Success] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Success] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Failed] Build of 'dl-op-tosa-transpose-benchmark' +[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/deeplearning/build_results_summary.log b/test_result/deeplearning/build_results_summary.log index 90bf162d..0f7a7c2e 100644 --- a/test_result/deeplearning/build_results_summary.log +++ b/test_result/deeplearning/build_results_summary.log @@ -30,3 +30,28 @@ [Failed] Build of 'dl-model-mobilenetv3-benchmark' [Success] Build of 'dl-model-lenet-benchmark' [Failed] Build of 'dl-model-bert-benchmark' +[Failed] Build of 'dl-model-whisper-benchmark' +[Failed] Build of 'dl-model-resnet18-benchmark' +[Success] Build of 'dl-layer-ffn-benchmark' +[Success] Build of 'dl-layer-selfattention-benchmark' +[Success] Build of 'dl-layer-rmsnorm-benchmark' +[Failed] Build of 'dl-op-linalg-matmul-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Success] Build of 'dl-op-linalg-arithaddf-benchmark' +[Success] Build of 'dl-op-linalg-arithdivf-benchmark' +[Success] Build of 'dl-op-linalg-arithmulf-benchmark' +[Success] Build of 'dl-op-linalg-arithnegf-benchmark' +[Success] Build of 'dl-op-linalg-arithsubf-benchmark' +[Success] Build of 'dl-op-linalg-mathfpow-benchmark' +[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Build of 'dl-op-linalg-mathexp-benchmark' +[Success] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Success] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Failed] Build of 'dl-op-tosa-transpose-benchmark' +[Failed] Build of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log index b20bfaa2..e69de29b 100644 --- a/test_result/deeplearning/dl-layer-ffn-benchmark.log +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.log @@ -1,18 +0,0 @@ -2025-09-07T12:45:30+00:00 -Running ./dl-layer-ffn-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.52, 3.41, 5.16 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------- -DL_LAYER_FFN/Scalar 0.068 ms 0.068 ms 10218 -DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 26193 ------------------------------------------------------------ -Correctness Verification: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log index 87b65afc..e69de29b 100644 --- a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log @@ -1,18 +0,0 @@ -2025-09-07T12:45:34+00:00 -Running ./dl-layer-rmsnorm-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.48, 3.39, 5.15 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------- -Benchmark Time CPU Iterations ------------------------------------------------------------------------------- -DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 339474 -DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 780156 ------------------------------------------------------------ -Correctness Verification: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log index 1db72b01..e69de29b 100644 --- a/test_result/deeplearning/dl-layer-selfattention-benchmark.log +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.log @@ -1,18 +0,0 @@ -2025-09-07T12:45:32+00:00 -Running ./dl-layer-selfattention-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.52, 3.41, 5.16 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------- -DL_LAYER_ATTENTION/Scalar 4.87 ms 4.87 ms 144 -DL_LAYER_ATTENTION/Auto_Vectorization 1.59 ms 1.59 ms 435 ------------------------------------------------------------ -Correctness Verification: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log index ac6de3f8..e69de29b 100644 --- a/test_result/deeplearning/dl-model-lenet-benchmark.log +++ b/test_result/deeplearning/dl-model-lenet-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:41:48+00:00 -Running ./dl-model-lenet-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 3.67, 4.13, 5.81 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------ -Benchmark Time CPU Iterations ------------------------------------------------------------------------------ -DL_MODEL_LENET/Auto_Vectorization 0.173 ms 0.173 ms 4111 -DL_MODEL_LENET/Buddy_Vectorization 0.144 ms 0.144 ms 4846 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log index 4d355935..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:46:04+00:00 -Running ./dl-op-linalg-arithaddf-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.35, 3.27, 5.05 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_ADDF_SCALAR 0.031 ms 0.031 ms 22527 -BM_ADDF_AutoVectorization 0.005 ms 0.005 ms 169988 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log index 2e4e9d89..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:46:07+00:00 -Running ./dl-op-linalg-arithdivf-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.35, 3.27, 5.05 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_DIVF_SCALAR 0.032 ms 0.032 ms 22003 -BM_DIVF_AutoVectorization 0.011 ms 0.011 ms 69823 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log index a6adac15..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:46:09+00:00 -Running ./dl-op-linalg-arithmulf-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.40, 3.26, 5.04 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_MULF_SCALAR 0.031 ms 0.031 ms 22824 -BM_MULF_AutoVectorization 0.004 ms 0.004 ms 169993 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log index d6e48286..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:46:11+00:00 -Running ./dl-op-linalg-arithnegf-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.40, 3.26, 5.04 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_NEGF_SCALAR 0.024 ms 0.024 ms 29588 -BM_NEGF_AutoVectorization 0.003 ms 0.003 ms 237464 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log index df9b7125..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:46:13+00:00 -Running ./dl-op-linalg-arithsubf-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.40, 3.26, 5.04 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_SUBF_SCALAR 0.031 ms 0.031 ms 22687 -BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 170328 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log index fc332eee..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:45:47+00:00 -Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.48, 3.36, 5.12 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- -Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_Conv2DNchwFchw_SCALAR 291 ms 291 ms 2 -BM_Conv2DNchwFchw_Im2col 8.56 ms 8.56 ms 72 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log index a49002a9..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log @@ -1,21 +0,0 @@ -2025-09-07T12:45:51+00:00 -Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.44, 3.33, 5.10 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ---------------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------- -DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 73.9 ms 73.9 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.73 ms 9.73 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.82 ms 1.82 ms 5 -DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.78 ms 1.78 ms 5 ----------- Verification ---------- -auto_vectorization PASS -vectorization PASS -vec_tile PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log index ff8e3c98..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:45:49+00:00 -Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.44, 3.33, 5.10 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ---------------------------------------------------------------------------------- -Benchmark Time CPU Iterations ---------------------------------------------------------------------------------- -BM_CONV_2D_NHWC_HWCF_SCALAR 33.4 ms 33.4 ms 21 -BM_CONV_2D_NHWC_HWCF_AutoVectorization 6.29 ms 6.29 ms 110 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log index 82895553..5e616453 100644 --- a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log @@ -1,19 +1 @@ -2025-09-07T12:45:52+00:00 -Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.44, 3.33, 5.10 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------- -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 4.31 ms 4.31 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.72 ms 1.72 ms 5 -DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.128 ms 0.128 ms 5 ----------- Verification ---------- -auto_vectorization PASS -vectorization PASS +qemu-riscv64-static: Could not open '/lib/ld-linux-riscv64-lp64d.so.1': No such file or directory diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log index 0a0532c4..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:46:19+00:00 -Running ./dl-op-linalg-mathexp-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.49, 3.25, 5.02 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------- -Benchmark Time CPU Iterations -------------------------------------------------------------------- -BM_EXP_SCALAR 0.047 ms 0.047 ms 14801 -BM_EXP_AutoVectorization 0.033 ms 0.033 ms 21304 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log index e763d9ba..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:46:15+00:00 -Running ./dl-op-linalg-mathfpow-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.45, 3.26, 5.03 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------- -BM_FPOW_SCALAR 0.086 ms 0.086 ms 8174 -BM_FPOW_AutoVectorization 0.059 ms 0.059 ms 11919 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log index 5079264e..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:46:17+00:00 -Running ./dl-op-linalg-mathrsqrt-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.45, 3.26, 5.03 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ---------------------------------------------------------------------- -Benchmark Time CPU Iterations ---------------------------------------------------------------------- -BM_RSQRT_SCALAR 0.075 ms 0.075 ms 9351 -BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 155807 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log index 43ece648..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:45:52+00:00 -Running ./dl-op-linalg-pooling-nhwc-sum-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.44, 3.33, 5.10 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------- -BM_POOLING_NHWC_SUM_SCALAR 0.240 ms 0.240 ms 2922 -BM_POOLING_NHWC_SUM_AutoVectorization 0.043 ms 0.043 ms 16330 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log index 63793e54..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log @@ -1,10 +0,0 @@ -2025-09-07T12:46:21+00:00 -Running ./dl-op-linalg-reduceaddf-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.49, 3.25, 5.02 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log index c7f97958..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log @@ -1,10 +0,0 @@ -2025-09-07T12:46:21+00:00 -Running ./dl-op-linalg-reducemaxf-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.49, 3.25, 5.02 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log index cfa81168..e69de29b 100644 --- a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log @@ -1,19 +0,0 @@ -2025-09-07T12:46:21+00:00 -Running ./dl-op-linalg-softmax-exp-sum-div-benchmark -Run on (24 X 5100 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x12) - L1 Instruction 32 KiB (x12) - L2 Unified 1280 KiB (x12) - L3 Unified 30720 KiB (x1) -Load Average: 2.49, 3.25, 5.02 -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------------------------- -BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 120007 -BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 176914 ------------------------------------------------------------ -Correctness Verification: -Transform case: PASS ------------------------------------------------------------ diff --git a/test_result/deeplearning/run_results_summary.log b/test_result/deeplearning/run_results_summary.log index e39e6015..309ac991 100644 --- a/test_result/deeplearning/run_results_summary.log +++ b/test_result/deeplearning/run_results_summary.log @@ -1,29 +1,49 @@ [Missing] Executable not found for 'dl-model-tinyllama-benchmark' [Missing] Executable not found for 'dl-model-mobilenetv3-benchmark' -[Missing] Executable not found for 'dl-model-lenet-benchmark' +[Failed] Run of 'dl-model-lenet-benchmark' + ↳ stdout/stderr → dl-model-lenet-benchmark.log (may contain errors) [Missing] Executable not found for 'dl-model-bert-benchmark' [Missing] Executable not found for 'dl-model-whisper-benchmark' [Missing] Executable not found for 'dl-model-resnet18-benchmark' -[Missing] Executable not found for 'dl-layer-ffn-benchmark' -[Missing] Executable not found for 'dl-layer-selfattention-benchmark' -[Missing] Executable not found for 'dl-layer-rmsnorm-benchmark' +[Failed] Run of 'dl-layer-ffn-benchmark' + ↳ stdout/stderr → dl-layer-ffn-benchmark.log (may contain errors) +[Failed] Run of 'dl-layer-selfattention-benchmark' + ↳ stdout/stderr → dl-layer-selfattention-benchmark.log (may contain errors) +[Failed] Run of 'dl-layer-rmsnorm-benchmark' + ↳ stdout/stderr → dl-layer-rmsnorm-benchmark.log (may contain errors) [Missing] Executable not found for 'dl-op-linalg-matmul-benchmark' -[Missing] Executable not found for 'dl-op-linalg-conv2d-nchw-fchw-benchmark' -[Missing] Executable not found for 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' -[Missing] Executable not found for 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' -[Missing] Executable not found for 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' -[Missing] Executable not found for 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Run of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' + ↳ stdout/stderr → dl-op-linalg-conv2d-nchw-fchw-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' + ↳ stdout/stderr → dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' + ↳ stdout/stderr → dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' + ↳ stdout/stderr → dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-pooling-nhwc-sum-benchmark' + ↳ stdout/stderr → dl-op-linalg-pooling-nhwc-sum-benchmark.log (may contain errors) [Missing] Executable not found for 'dl-op-linalg-batch-matmul-benchmark' -[Missing] Executable not found for 'dl-op-linalg-arithaddf-benchmark' -[Missing] Executable not found for 'dl-op-linalg-arithdivf-benchmark' -[Missing] Executable not found for 'dl-op-linalg-arithmulf-benchmark' -[Missing] Executable not found for 'dl-op-linalg-arithnegf-benchmark' -[Missing] Executable not found for 'dl-op-linalg-arithsubf-benchmark' -[Missing] Executable not found for 'dl-op-linalg-mathfpow-benchmark' -[Missing] Executable not found for 'dl-op-linalg-mathrsqrt-benchmark' -[Missing] Executable not found for 'dl-op-linalg-mathexp-benchmark' -[Missing] Executable not found for 'dl-op-linalg-reduceaddf-benchmark' -[Missing] Executable not found for 'dl-op-linalg-reducemaxf-benchmark' -[Missing] Executable not found for 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Failed] Run of 'dl-op-linalg-arithaddf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithaddf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-arithdivf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithdivf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-arithmulf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithmulf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-arithnegf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithnegf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-arithsubf-benchmark' + ↳ stdout/stderr → dl-op-linalg-arithsubf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-mathfpow-benchmark' + ↳ stdout/stderr → dl-op-linalg-mathfpow-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-mathrsqrt-benchmark' + ↳ stdout/stderr → dl-op-linalg-mathrsqrt-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-mathexp-benchmark' + ↳ stdout/stderr → dl-op-linalg-mathexp-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-reduceaddf-benchmark' + ↳ stdout/stderr → dl-op-linalg-reduceaddf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-reducemaxf-benchmark' + ↳ stdout/stderr → dl-op-linalg-reducemaxf-benchmark.log (may contain errors) +[Failed] Run of 'dl-op-linalg-softmax-exp-sum-div-benchmark' + ↳ stdout/stderr → dl-op-linalg-softmax-exp-sum-div-benchmark.log (may contain errors) [Missing] Executable not found for 'dl-op-tosa-transpose-benchmark' [Missing] Executable not found for 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index fbe2c35c..538ed5ff 100644 --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:06:58+00:00", + "date": "2025-09-07T14:23:19+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.79346,4.37109,6.12695], + "load_avg": [3.03271,3.75732,4.84424], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 140, - "real_time": 5.0267200917005539e+00, - "cpu_time": 5.0265640428571432e+00, + "iterations": 136, + "real_time": 5.1130582030643437e+00, + "cpu_time": 5.1129978750000005e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 91, - "real_time": 7.6496493849125535e+00, - "cpu_time": 7.6493698021978016e+00, + "iterations": 92, + "real_time": 7.5614458840826284e+00, + "cpu_time": 7.5612571630434795e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1044, - "real_time": 7.0535207802422661e-01, - "cpu_time": 7.0532346839080440e-01, + "iterations": 991, + "real_time": 7.3911826286979931e-01, + "cpu_time": 7.3911208072653889e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 603, - "real_time": 1.1493037354392595e+00, - "cpu_time": 1.1492314079601984e+00, + "iterations": 634, + "real_time": 1.0977113554812381e+00, + "cpu_time": 1.0976892066246056e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 351, - "real_time": 1.9643567396365000e+00, - "cpu_time": 1.9642499658119676e+00, + "iterations": 359, + "real_time": 1.9464333305617894e+00, + "cpu_time": 1.9463996295264627e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4666, - "real_time": 1.4894739499150353e-01, - "cpu_time": 1.4894085683669100e-01, + "iterations": 4731, + "real_time": 1.4831822525368915e-01, + "cpu_time": 1.4831527478334391e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2568, - "real_time": 2.7369540561907391e-01, - "cpu_time": 2.7368365965732100e-01, + "iterations": 2579, + "real_time": 2.7571449409440857e-01, + "cpu_time": 2.7571217332299330e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 100789, - "real_time": 6.8543536302857408e-03, - "cpu_time": 6.8541224836043569e-03, + "iterations": 101112, + "real_time": 6.9151851410107082e-03, + "cpu_time": 6.9149452488329738e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48298, - "real_time": 1.4474783446770128e-02, - "cpu_time": 1.4474468797879825e-02, + "iterations": 47882, + "real_time": 1.4606120489572882e-02, + "cpu_time": 1.4605831794828930e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2759, - "real_time": 2.4659659722958324e-01, - "cpu_time": 2.4658607828923515e-01, + "iterations": 2267, + "real_time": 2.9327621150237643e-01, + "cpu_time": 2.9326395059550070e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2891, - "real_time": 2.4580984498464042e-01, - "cpu_time": 2.4580015496368032e-01, + "iterations": 2325, + "real_time": 2.8085091902363685e-01, + "cpu_time": 2.8084884645161295e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1723, - "real_time": 4.0453681489858112e-01, - "cpu_time": 4.0451324260011656e-01, + "iterations": 1277, + "real_time": 4.9499843850113401e-01, + "cpu_time": 4.9497863508222334e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1732, - "real_time": 4.1367237694888415e-01, - "cpu_time": 4.1365702655889142e-01, + "iterations": 1822, + "real_time": 3.4349706150697695e-01, + "cpu_time": 3.4349251042810131e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 638, - "real_time": 1.0285404420291369e+00, - "cpu_time": 1.0285238934169287e+00, + "iterations": 705, + "real_time": 9.3753048291443086e-01, + "cpu_time": 9.3750746524822892e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 644, - "real_time": 1.0358063990007276e+00, - "cpu_time": 1.0357866428571438e+00, + "iterations": 717, + "real_time": 9.3463689836996866e-01, + "cpu_time": 9.3461899302649698e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4822, - "real_time": 1.4633293893637869e-01, - "cpu_time": 1.4632941041061789e-01, + "iterations": 4954, + "real_time": 1.4094130591394249e-01, + "cpu_time": 1.4093779975777146e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2981, - "real_time": 2.2754417554278855e-01, - "cpu_time": 2.2754064776920505e-01, + "iterations": 3148, + "real_time": 2.2180359976264391e-01, + "cpu_time": 2.2180045520965658e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3123, - "real_time": 2.2576934456100001e-01, - "cpu_time": 2.2575589721421732e-01, + "iterations": 3189, + "real_time": 2.1949756099685078e-01, + "cpu_time": 2.1949316149263115e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2693, - "real_time": 2.6050619877849951e-01, - "cpu_time": 2.6050195915336039e-01, + "iterations": 2736, + "real_time": 2.5490349926577327e-01, + "cpu_time": 2.5490010709064376e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2699, - "real_time": 2.5437106994312308e-01, - "cpu_time": 2.5436423193775537e-01, + "iterations": 2737, + "real_time": 2.5427932852717877e-01, + "cpu_time": 2.5427474059188909e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2677, - "real_time": 2.6176411200691879e-01, - "cpu_time": 2.6176175046694000e-01, + "iterations": 2679, + "real_time": 2.6234736879533155e-01, + "cpu_time": 2.6234529749906660e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4871, - "real_time": 1.4305631132053415e-01, - "cpu_time": 1.4305312009854276e-01, + "iterations": 4852, + "real_time": 1.4455454318069369e-01, + "cpu_time": 1.4455208182192936e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 82b87d75..bbdcfd5a 100644 --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:06:58+00:00 +2025-09-07T14:23:19+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.79, 4.37, 6.13 +Load Average: 3.03, 3.76, 4.84 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.03 ms 5.03 ms 140 -MLIR_Conv2D/1 7.65 ms 7.65 ms 91 -Buddy_Conv2D/1 0.705 ms 0.705 ms 1044 -Buddy_Corr2D_Constant_Padding/1 1.15 ms 1.15 ms 603 -OpenCV_Filter2D_Constant_Padding/1 1.96 ms 1.96 ms 351 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4666 -Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2568 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100789 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48298 -Buddy_Erosion2D_Constant_Padding/1 0.247 ms 0.247 ms 2759 -Buddy_Dilation2D_Constant_Padding/1 0.246 ms 0.246 ms 2891 -Buddy_Opening2D_Constant_Padding/1 0.405 ms 0.405 ms 1723 -Buddy_Closing2D_Constant_Padding/1 0.414 ms 0.414 ms 1732 -Buddy_TopHat2D_Constant_Padding/1 1.03 ms 1.03 ms 638 -Buddy_BottomHat2D_Constant_Padding/1 1.04 ms 1.04 ms 644 -OpenCV_Erode2D_Constant_Padding/1 0.146 ms 0.146 ms 4822 -OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 2981 -OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3123 -OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2693 -OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2699 -OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2677 -OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4871 +Eigen_Convolve2D/1 5.11 ms 5.11 ms 136 +MLIR_Conv2D/1 7.56 ms 7.56 ms 92 +Buddy_Conv2D/1 0.739 ms 0.739 ms 991 +Buddy_Corr2D_Constant_Padding/1 1.10 ms 1.10 ms 634 +OpenCV_Filter2D_Constant_Padding/1 1.95 ms 1.95 ms 359 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4731 +Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2579 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101112 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47882 +Buddy_Erosion2D_Constant_Padding/1 0.293 ms 0.293 ms 2267 +Buddy_Dilation2D_Constant_Padding/1 0.281 ms 0.281 ms 2325 +Buddy_Opening2D_Constant_Padding/1 0.495 ms 0.495 ms 1277 +Buddy_Closing2D_Constant_Padding/1 0.343 ms 0.343 ms 1822 +Buddy_TopHat2D_Constant_Padding/1 0.938 ms 0.938 ms 705 +Buddy_BottomHat2D_Constant_Padding/1 0.935 ms 0.935 ms 717 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4954 +OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3148 +OpenCV_Closing2D_Constant_Padding/1 0.219 ms 0.219 ms 3189 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2736 +OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2737 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2679 +OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4852 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index e07a24a2..f80f74e1 100644 --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:07:22+00:00", + "date": "2025-09-07T14:23:42+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.99609,4.29199,6.05322], + "load_avg": [3.10303,3.72461,4.80908], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 139, - "real_time": 4.9621713140027985e+00, - "cpu_time": 4.9619625107913672e+00, + "iterations": 140, + "real_time": 4.9881702821169585e+00, + "cpu_time": 4.9880098642857140e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 93, - "real_time": 7.5804082456455433e+00, - "cpu_time": 7.5800841397849492e+00, + "iterations": 92, + "real_time": 7.5616192315583648e+00, + "cpu_time": 7.5614974891304341e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1031, - "real_time": 6.8261508335775822e-01, - "cpu_time": 6.8259750145489806e-01, + "iterations": 938, + "real_time": 7.0375580984010877e-01, + "cpu_time": 7.0372593390191918e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 633, - "real_time": 1.1063351650467792e+00, - "cpu_time": 1.1062885023696678e+00, + "iterations": 634, + "real_time": 1.0931943234773089e+00, + "cpu_time": 1.0931488375394325e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 357, - "real_time": 1.9599260861466246e+00, - "cpu_time": 1.9598406246498592e+00, + "iterations": 360, + "real_time": 1.9437930236260097e+00, + "cpu_time": 1.9436920166666676e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4635, - "real_time": 1.4986117664792906e-01, - "cpu_time": 1.4983970463861931e-01, + "iterations": 4731, + "real_time": 1.4828980958025845e-01, + "cpu_time": 1.4828513654618480e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2549, - "real_time": 2.7376641481004071e-01, - "cpu_time": 2.7375588662220468e-01, + "iterations": 2575, + "real_time": 2.7519861905320175e-01, + "cpu_time": 2.7518627728155343e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102375, - "real_time": 7.3848935128422739e-03, - "cpu_time": 7.3845735286935292e-03, + "iterations": 100987, + "real_time": 6.9202101019204187e-03, + "cpu_time": 6.9199823739689269e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48287, - "real_time": 1.4458113028899603e-02, - "cpu_time": 1.4457668357943133e-02, + "iterations": 47878, + "real_time": 1.4626334239538314e-02, + "cpu_time": 1.4625822966707055e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2811, - "real_time": 2.5138499600231667e-01, - "cpu_time": 2.5137254215581684e-01, + "iterations": 2361, + "real_time": 2.8520150363369545e-01, + "cpu_time": 2.8518143879711966e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2642, - "real_time": 2.6127819555623344e-01, - "cpu_time": 2.6126649242997735e-01, + "iterations": 2362, + "real_time": 3.0680066550262491e-01, + "cpu_time": 3.0677437510584199e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1671, - "real_time": 4.1013477868408876e-01, - "cpu_time": 4.1010871035308216e-01, + "iterations": 1000, + "real_time": 6.1990195512771606e-01, + "cpu_time": 6.1985857099999997e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1687, - "real_time": 4.3260783981697476e-01, - "cpu_time": 4.3259696087729665e-01, + "iterations": 1712, + "real_time": 3.8910013945581756e-01, + "cpu_time": 3.8908160338785042e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 632, - "real_time": 1.0608510892319529e+00, - "cpu_time": 1.0608173591772148e+00, + "iterations": 698, + "real_time": 9.5385507304210715e-01, + "cpu_time": 9.5382185100286465e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 650, - "real_time": 1.0382979993636792e+00, - "cpu_time": 1.0382782784615385e+00, + "iterations": 712, + "real_time": 9.5589765546362049e-01, + "cpu_time": 9.5589013202247197e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4927, - "real_time": 1.4230332964691400e-01, - "cpu_time": 1.4229967972397004e-01, + "iterations": 4941, + "real_time": 1.4135648417849214e-01, + "cpu_time": 1.4134417870876342e-01, "time_unit": "ms" }, { @@ -269,11 +269,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3177, - "real_time": 2.2380305398437336e-01, - "cpu_time": 2.2379998048473357e-01, + "iterations": 3094, + "real_time": 2.2559705339059108e-01, + "cpu_time": 2.2558599385908198e-01, "time_unit": "ms" - }, + } { "name": "OpenCV_Closing2D_Constant_Padding/1", "family_index": 17, @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3189, - "real_time": 2.2100701731886374e-01, - "cpu_time": 2.2100314989024730e-01, + "iterations": 3186, + "real_time": 2.1989222520077220e-01, + "cpu_time": 2.1989068173258070e-01, "time_unit": "ms" }, { @@ -298,8 +298,8 @@ "repetition_index": 0, "threads": 1, "iterations": 2722, - "real_time": 2.5795674680962799e-01, - "cpu_time": 2.5794806943423992e-01, + "real_time": 2.6317579229687349e-01, + "cpu_time": 2.6317380529022705e-01, "time_unit": "ms" }, { @@ -311,38 +311,8 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2649, - "real_time": 2.6164399358001283e-01, - "cpu_time": 2.6163493733484300e-01, - "time_unit": "ms" - }, - { - "name": "OpenCV_MorphGrad2D_Constant_Padding/1", - "family_index": 20, - "per_family_instance_index": 0, - "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", - "run_type": "iteration", - "repetitions": 1, - "repetition_index": 0, - "threads": 1, - "iterations": 2636, - "real_time": 2.6538427240796081e-01, - "cpu_time": 2.6537967336874019e-01, + "iterations": 2637, + "real_time": 2.6459206470631269e-01, + "cpu_time": 2.6458999696624969e-01, "time_unit": "ms" - }, - { - "name": "OpenCV_Dilate2D_Constant_Padding/1", - "family_index": 21, - "per_family_instance_index": 0, - "run_name": "OpenCV_Dilate2D_Constant_Padding/1", - "run_type": "iteration", - "repetitions": 1, - "repetition_index": 0, - "threads": 1, - "iterations": 4833, - "real_time": 1.4401911400426576e-01, - "cpu_time": 1.4401146348023988e-01, - "time_unit": "ms" - } - ] -} + } \ No newline at end of file diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 8d5da69b..919ab5bf 100644 --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:07:22+00:00 +2025-09-07T14:23:42+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,51 +6,28 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.00, 4.29, 6.05 +Load Average: 3.10, 3.72, 4.81 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.96 ms 4.96 ms 139 -MLIR_Conv2D/1 7.58 ms 7.58 ms 93 -Buddy_Conv2D/1 0.683 ms 0.683 ms 1031 -Buddy_Corr2D_Constant_Padding/1 1.11 ms 1.11 ms 633 -OpenCV_Filter2D_Constant_Padding/1 1.96 ms 1.96 ms 357 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4635 -Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2549 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102375 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48287 -Buddy_Erosion2D_Constant_Padding/1 0.251 ms 0.251 ms 2811 -Buddy_Dilation2D_Constant_Padding/1 0.261 ms 0.261 ms 2642 -Buddy_Opening2D_Constant_Padding/1 0.410 ms 0.410 ms 1671 -Buddy_Closing2D_Constant_Padding/1 0.433 ms 0.433 ms 1687 -Buddy_TopHat2D_Constant_Padding/1 1.06 ms 1.06 ms 632 -Buddy_BottomHat2D_Constant_Padding/1 1.04 ms 1.04 ms 650 -OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4927 -OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3177 -OpenCV_Closing2D_Constant_Padding/1 0.221 ms 0.221 ms 3189 -OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2722 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2649 -OpenCV_MorphGrad2D_Constant_Padding/1 0.265 ms 0.265 ms 2636 -OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4833 -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. +Eigen_Convolve2D/1 4.99 ms 4.99 ms 140 +MLIR_Conv2D/1 7.56 ms 7.56 ms 92 +Buddy_Conv2D/1 0.704 ms 0.704 ms 938 +Buddy_Corr2D_Constant_Padding/1 1.09 ms 1.09 ms 634 +OpenCV_Filter2D_Constant_Padding/1 1.94 ms 1.94 ms 360 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4731 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2575 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100987 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47878 +Buddy_Erosion2D_Constant_Padding/1 0.285 ms 0.285 ms 2361 +Buddy_Dilation2D_Constant_Padding/1 0.307 ms 0.307 ms 2362 +Buddy_Opening2D_Constant_Padding/1 0.620 ms 0.620 ms 1000 +Buddy_Closing2D_Constant_Padding/1 0.389 ms 0.389 ms 1712 +Buddy_TopHat2D_Constant_Padding/1 0.954 ms 0.954 ms 698 +Buddy_BottomHat2D_Constant_Padding/1 0.956 ms 0.956 ms 712 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4941 +OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3094 +OpenCV_Closing2D_Constant_Padding/1 0.220 ms 0.220 ms 3186 +OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2722 +OpenCV_BottomHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2637 diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index b8e4eae6..73c6c792 100644 --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:02:57+00:00", + "date": "2025-09-07T14:19:23+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [5.82666,6.12891,7.06201], + "load_avg": [4.16504,4.64014,5.37695], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 136, - "real_time": 5.1284465007483959e+00, - "cpu_time": 5.1283712794117653e+00, + "iterations": 139, + "real_time": 5.0173752385077712e+00, + "cpu_time": 5.0171431366906489e+00, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 93, - "real_time": 7.5461328510315191e+00, - "cpu_time": 7.5460632903225813e+00, + "real_time": 7.5428272367164650e+00, + "cpu_time": 7.5426328494623673e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1000, - "real_time": 5.0872633233666420e-01, - "cpu_time": 5.0871887600000010e-01, + "iterations": 1245, + "real_time": 5.5684779123608841e-01, + "cpu_time": 5.5682440080321294e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 841, - "real_time": 8.3388392586486948e-01, - "cpu_time": 8.3387196195005986e-01, + "iterations": 828, + "real_time": 8.4202265555875888e-01, + "cpu_time": 8.4200422584541035e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 536, - "real_time": 1.3044935150711394e+00, - "cpu_time": 1.3044258451492536e+00, + "iterations": 531, + "real_time": 1.3090186522617628e+00, + "cpu_time": 1.3089821713747642e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4609, - "real_time": 1.5180687935136616e-01, - "cpu_time": 1.5180155087871558e-01, + "iterations": 4709, + "real_time": 1.4896581911007850e-01, + "cpu_time": 1.4896079018899980e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2575, - "real_time": 2.7318556187222304e-01, - "cpu_time": 2.7316950174757265e-01, + "iterations": 2577, + "real_time": 2.7260999168172845e-01, + "cpu_time": 2.7260413853317822e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101061, - "real_time": 6.8790974146093432e-03, - "cpu_time": 6.8788971017504311e-03, + "iterations": 100412, + "real_time": 6.9631285576930929e-03, + "cpu_time": 6.9626375632394480e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48323, - "real_time": 1.4452015748223412e-02, - "cpu_time": 1.4451731452931325e-02, + "iterations": 47961, + "real_time": 1.4606321100466758e-02, + "cpu_time": 1.4605708992723267e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2784, - "real_time": 2.3488737723051473e-01, - "cpu_time": 2.3488115193965531e-01, + "iterations": 2105, + "real_time": 3.0771779369854868e-01, + "cpu_time": 3.0768270926365782e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2926, - "real_time": 2.3911561834755127e-01, - "cpu_time": 2.3910948803827733e-01, + "iterations": 2485, + "real_time": 3.0552347601059698e-01, + "cpu_time": 3.0551694647887367e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1452, - "real_time": 4.3227852991789828e-01, - "cpu_time": 4.3227057300275434e-01, + "iterations": 1103, + "real_time": 5.5804573411522185e-01, + "cpu_time": 5.5798702266545741e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1785, - "real_time": 3.9728948799501945e-01, - "cpu_time": 3.9727350868347305e-01, + "iterations": 1192, + "real_time": 5.6027826984356710e-01, + "cpu_time": 5.6026445553691262e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 643, - "real_time": 1.0138588029062767e+00, - "cpu_time": 1.0138225256609652e+00, + "iterations": 698, + "real_time": 9.4223976199299009e-01, + "cpu_time": 9.4221851719197713e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 661, - "real_time": 1.0071998960852804e+00, - "cpu_time": 1.0071388003025714e+00, + "iterations": 710, + "real_time": 9.8398026656097093e-01, + "cpu_time": 9.8392653380281692e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4784, - "real_time": 1.4612457191689956e-01, - "cpu_time": 1.4612131396321087e-01, + "iterations": 4902, + "real_time": 1.4306782463262246e-01, + "cpu_time": 1.4306102203182383e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3134, - "real_time": 2.2347150792456313e-01, - "cpu_time": 2.2346277632418668e-01, + "iterations": 3075, + "real_time": 2.2767367886333931e-01, + "cpu_time": 2.2766505788617861e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3168, - "real_time": 2.2385514816363353e-01, - "cpu_time": 2.2384855082070679e-01, + "iterations": 3093, + "real_time": 2.2700202289692917e-01, + "cpu_time": 2.2700032945360529e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2747, - "real_time": 2.5661755387115620e-01, - "cpu_time": 2.5660987295231175e-01, + "iterations": 2641, + "real_time": 2.6595405740568195e-01, + "cpu_time": 2.6595034721696315e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2718, - "real_time": 2.5766646329140819e-01, - "cpu_time": 2.5765858977189021e-01, + "iterations": 2644, + "real_time": 2.6620356380127924e-01, + "cpu_time": 2.6620060287443309e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2631, - "real_time": 2.6644636065911903e-01, - "cpu_time": 2.6643485404789002e-01, + "iterations": 2674, + "real_time": 2.6205028271443287e-01, + "cpu_time": 2.6204837621540705e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4760, - "real_time": 1.4655530656210514e-01, - "cpu_time": 1.4655188613445386e-01, + "iterations": 4913, + "real_time": 1.4280434565172404e-01, + "cpu_time": 1.4280282454712012e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index c183c831..a978a72e 100644 --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:02:57+00:00 +2025-09-07T14:19:23+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 5.83, 6.13, 7.06 +Load Average: 4.17, 4.64, 5.38 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.13 ms 5.13 ms 136 -MLIR_Conv2D/1 7.55 ms 7.55 ms 93 -Buddy_Conv2D/1 0.509 ms 0.509 ms 1000 -Buddy_Corr2D_Constant_Padding/1 0.834 ms 0.834 ms 841 -OpenCV_Filter2D_Constant_Padding/1 1.30 ms 1.30 ms 536 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.152 ms 0.152 ms 4609 -Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2575 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101061 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48323 -Buddy_Erosion2D_Constant_Padding/1 0.235 ms 0.235 ms 2784 -Buddy_Dilation2D_Constant_Padding/1 0.239 ms 0.239 ms 2926 -Buddy_Opening2D_Constant_Padding/1 0.432 ms 0.432 ms 1452 -Buddy_Closing2D_Constant_Padding/1 0.397 ms 0.397 ms 1785 -Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 643 -Buddy_BottomHat2D_Constant_Padding/1 1.01 ms 1.01 ms 661 -OpenCV_Erode2D_Constant_Padding/1 0.146 ms 0.146 ms 4784 -OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3134 -OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3168 -OpenCV_TopHat2D_Constant_Padding/1 0.257 ms 0.257 ms 2747 -OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2718 -OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2631 -OpenCV_Dilate2D_Constant_Padding/1 0.147 ms 0.147 ms 4760 +Eigen_Convolve2D/1 5.02 ms 5.02 ms 139 +MLIR_Conv2D/1 7.54 ms 7.54 ms 93 +Buddy_Conv2D/1 0.557 ms 0.557 ms 1245 +Buddy_Corr2D_Constant_Padding/1 0.842 ms 0.842 ms 828 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 531 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4709 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2577 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100412 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47961 +Buddy_Erosion2D_Constant_Padding/1 0.308 ms 0.308 ms 2105 +Buddy_Dilation2D_Constant_Padding/1 0.306 ms 0.306 ms 2485 +Buddy_Opening2D_Constant_Padding/1 0.558 ms 0.558 ms 1103 +Buddy_Closing2D_Constant_Padding/1 0.560 ms 0.560 ms 1192 +Buddy_TopHat2D_Constant_Padding/1 0.942 ms 0.942 ms 698 +Buddy_BottomHat2D_Constant_Padding/1 0.984 ms 0.984 ms 710 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4902 +OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3075 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3093 +OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2641 +OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2644 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2674 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4913 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index f3967767..a7d1c605 100644 --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:03:20+00:00", + "date": "2025-09-07T14:19:46+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [4.87891,5.87744,6.95312], + "load_avg": [3.8335,4.5332,5.3252], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 140, - "real_time": 5.0184934533068111e+00, - "cpu_time": 5.0181792214285723e+00, + "iterations": 138, + "real_time": 5.0004229111515954e+00, + "cpu_time": 5.0002145289855076e+00, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 92, - "real_time": 7.6453953819430396e+00, - "cpu_time": 7.6449425760869589e+00, + "real_time": 7.6372407214797065e+00, + "cpu_time": 7.6370552065217412e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1324, - "real_time": 5.3367181329388635e-01, - "cpu_time": 5.3365209365558919e-01, + "iterations": 1355, + "real_time": 5.2936936821444891e-01, + "cpu_time": 5.2935688634686340e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 790, - "real_time": 8.6351649576350098e-01, - "cpu_time": 8.6345711772151901e-01, + "iterations": 842, + "real_time": 8.3820865159929503e-01, + "cpu_time": 8.3819253444180519e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 521, - "real_time": 1.3381383969893612e+00, - "cpu_time": 1.3381011746641083e+00, + "iterations": 532, + "real_time": 1.3121523915376878e+00, + "cpu_time": 1.3121226522556384e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4506, - "real_time": 1.5247919416088979e-01, - "cpu_time": 1.5247161540168652e-01, + "iterations": 4690, + "real_time": 1.4891786330036008e-01, + "cpu_time": 1.4891265415778249e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2507, - "real_time": 2.7503761510046343e-01, - "cpu_time": 2.7502824491423988e-01, + "iterations": 2560, + "real_time": 2.7395108627388254e-01, + "cpu_time": 2.7393973632812496e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101549, - "real_time": 6.8775821290780010e-03, - "cpu_time": 6.8772612039508015e-03, + "iterations": 100948, + "real_time": 6.9163674436630357e-03, + "cpu_time": 6.9160762570828491e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 47841, - "real_time": 1.4524636204003844e-02, - "cpu_time": 1.4524494575782268e-02, + "iterations": 47793, + "real_time": 1.4607901661653386e-02, + "cpu_time": 1.4607340426422288e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2772, - "real_time": 2.6418570520471873e-01, - "cpu_time": 2.6416973701298724e-01, + "iterations": 2111, + "real_time": 2.9704664370167710e-01, + "cpu_time": 2.9704162955945018e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2644, - "real_time": 2.6406940501098736e-01, - "cpu_time": 2.6405351172465935e-01, + "iterations": 2524, + "real_time": 2.9669307827618913e-01, + "cpu_time": 2.9669044453248805e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1476, - "real_time": 4.5833709904656500e-01, - "cpu_time": 4.5832794173441710e-01, + "iterations": 1162, + "real_time": 5.5793046040809913e-01, + "cpu_time": 5.5792592254733153e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1514, - "real_time": 4.6325272719327809e-01, - "cpu_time": 4.6324274768824364e-01, + "iterations": 1000, + "real_time": 5.7320801541209221e-01, + "cpu_time": 5.7317745799999997e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 614, - "real_time": 1.1151395738124847e+00, - "cpu_time": 1.1151286921824080e+00, + "iterations": 726, + "real_time": 9.6233903376524110e-01, + "cpu_time": 9.6231244352617151e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 600, - "real_time": 1.1484713479876518e+00, - "cpu_time": 1.1484272233333332e+00, + "iterations": 698, + "real_time": 9.5446288244088950e-01, + "cpu_time": 9.5444632521490114e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4834, - "real_time": 1.4438382267064184e-01, - "cpu_time": 1.4438079209764174e-01, + "iterations": 4912, + "real_time": 1.4231482092958514e-01, + "cpu_time": 1.4231082552931573e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3113, - "real_time": 2.2669639252904064e-01, - "cpu_time": 2.2668890459363947e-01, + "iterations": 3094, + "real_time": 2.2435946595923084e-01, + "cpu_time": 2.2434201357466077e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2911, - "real_time": 2.4054079162669076e-01, - "cpu_time": 2.4053840467193477e-01, + "iterations": 3059, + "real_time": 2.2924085333090899e-01, + "cpu_time": 2.2922583458646567e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2646, - "real_time": 2.6420583400821829e-01, - "cpu_time": 2.6419529705215339e-01, + "iterations": 2607, + "real_time": 2.6636522107532212e-01, + "cpu_time": 2.6635171461449875e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2629, - "real_time": 2.7253198341391033e-01, - "cpu_time": 2.7252830962343044e-01, + "iterations": 2651, + "real_time": 2.6092289197629281e-01, + "cpu_time": 2.6090935382874364e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2630, - "real_time": 2.6534779613462239e-01, - "cpu_time": 2.6533501787072172e-01, + "iterations": 2641, + "real_time": 2.6426135376518939e-01, + "cpu_time": 2.6424787012495315e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4812, - "real_time": 1.4677566591517091e-01, - "cpu_time": 1.4676936450540284e-01, + "iterations": 4878, + "real_time": 1.4334160020455028e-01, + "cpu_time": 1.4333910229602290e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 62ca55c3..5f275baa 100644 --- a/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:03:20+00:00 +2025-09-07T14:19:46+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 4.88, 5.88, 6.95 +Load Average: 3.83, 4.53, 5.33 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.02 ms 5.02 ms 140 -MLIR_Conv2D/1 7.65 ms 7.64 ms 92 -Buddy_Conv2D/1 0.534 ms 0.534 ms 1324 -Buddy_Corr2D_Constant_Padding/1 0.864 ms 0.863 ms 790 -OpenCV_Filter2D_Constant_Padding/1 1.34 ms 1.34 ms 521 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.152 ms 0.152 ms 4506 -Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2507 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101549 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47841 -Buddy_Erosion2D_Constant_Padding/1 0.264 ms 0.264 ms 2772 -Buddy_Dilation2D_Constant_Padding/1 0.264 ms 0.264 ms 2644 -Buddy_Opening2D_Constant_Padding/1 0.458 ms 0.458 ms 1476 -Buddy_Closing2D_Constant_Padding/1 0.463 ms 0.463 ms 1514 -Buddy_TopHat2D_Constant_Padding/1 1.12 ms 1.12 ms 614 -Buddy_BottomHat2D_Constant_Padding/1 1.15 ms 1.15 ms 600 -OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4834 -OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 3113 -OpenCV_Closing2D_Constant_Padding/1 0.241 ms 0.241 ms 2911 -OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2646 -OpenCV_BottomHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2629 -OpenCV_MorphGrad2D_Constant_Padding/1 0.265 ms 0.265 ms 2630 -OpenCV_Dilate2D_Constant_Padding/1 0.147 ms 0.147 ms 4812 +Eigen_Convolve2D/1 5.00 ms 5.00 ms 138 +MLIR_Conv2D/1 7.64 ms 7.64 ms 92 +Buddy_Conv2D/1 0.529 ms 0.529 ms 1355 +Buddy_Corr2D_Constant_Padding/1 0.838 ms 0.838 ms 842 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 532 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4690 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2560 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100948 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47793 +Buddy_Erosion2D_Constant_Padding/1 0.297 ms 0.297 ms 2111 +Buddy_Dilation2D_Constant_Padding/1 0.297 ms 0.297 ms 2524 +Buddy_Opening2D_Constant_Padding/1 0.558 ms 0.558 ms 1162 +Buddy_Closing2D_Constant_Padding/1 0.573 ms 0.573 ms 1000 +Buddy_TopHat2D_Constant_Padding/1 0.962 ms 0.962 ms 726 +Buddy_BottomHat2D_Constant_Padding/1 0.954 ms 0.954 ms 698 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4912 +OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3094 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3059 +OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2607 +OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2651 +OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2641 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4878 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index c572ef69..0bdbe2b4 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:03:44+00:00", + "date": "2025-09-07T14:20:10+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [4.40625,5.69189,6.8623], + "load_avg": [3.54834,4.40869,5.26172], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 136, - "real_time": 5.1616181290763263e+00, - "cpu_time": 5.1615701838235291e+00, + "iterations": 135, + "real_time": 5.1553568078411951e+00, + "cpu_time": 5.1551722074074089e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 91, - "real_time": 7.6719810674478719e+00, - "cpu_time": 7.6718306263736249e+00, + "iterations": 92, + "real_time": 7.5749951293287072e+00, + "cpu_time": 7.5747488586956546e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1318, - "real_time": 5.1680353381112787e-01, - "cpu_time": 5.1676989984825494e-01, + "iterations": 1284, + "real_time": 5.3980214248565128e-01, + "cpu_time": 5.3977712928348909e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 810, - "real_time": 8.7384458769250795e-01, - "cpu_time": 8.7382117160493800e-01, + "iterations": 832, + "real_time": 8.4146023996604180e-01, + "cpu_time": 8.4142216826923055e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 527, - "real_time": 1.3300638073309550e+00, - "cpu_time": 1.3300308349146115e+00, + "iterations": 533, + "real_time": 1.3132831169114103e+00, + "cpu_time": 1.3132458273921197e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4520, - "real_time": 1.5405326817942933e-01, - "cpu_time": 1.5404699181415932e-01, + "iterations": 4700, + "real_time": 1.4895170213694267e-01, + "cpu_time": 1.4894422382978711e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2534, - "real_time": 2.7796252385875608e-01, - "cpu_time": 2.7795281649565928e-01, + "iterations": 2571, + "real_time": 2.7564828269957381e-01, + "cpu_time": 2.7563723648385829e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 100621, - "real_time": 6.9713447932306287e-03, - "cpu_time": 6.9711385197920878e-03, + "iterations": 101036, + "real_time": 6.9258848736223769e-03, + "cpu_time": 6.9255131933172341e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 47677, - "real_time": 1.4668967632108515e-02, - "cpu_time": 1.4668751662227062e-02, + "iterations": 47859, + "real_time": 1.4628459030991201e-02, + "cpu_time": 1.4628155059654391e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2528, - "real_time": 2.6593122430900229e-01, - "cpu_time": 2.6592099723101265e-01, + "iterations": 2611, + "real_time": 2.7920378351385045e-01, + "cpu_time": 2.7918063960168527e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2586, - "real_time": 2.6167884803242397e-01, - "cpu_time": 2.6166388399071899e-01, + "iterations": 2244, + "real_time": 2.7905655742594693e-01, + "cpu_time": 2.7904662745097980e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1531, - "real_time": 4.6423418723548027e-01, - "cpu_time": 4.6422537557152155e-01, + "iterations": 1236, + "real_time": 5.2211519246348281e-01, + "cpu_time": 5.2209284142394796e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1535, - "real_time": 4.6207869625635178e-01, - "cpu_time": 4.6206046188925093e-01, + "iterations": 1165, + "real_time": 4.4711210760947462e-01, + "cpu_time": 4.4709575622317560e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 599, - "real_time": 1.1238463732953461e+00, - "cpu_time": 1.1238221803004993e+00, + "iterations": 715, + "real_time": 9.3931627127674078e-01, + "cpu_time": 9.3925903916084019e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 647, - "real_time": 1.1313838029202574e+00, - "cpu_time": 1.1313273972179276e+00, + "iterations": 723, + "real_time": 9.5567062888732424e-01, + "cpu_time": 9.5561843983402406e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4801, - "real_time": 1.4477257687234552e-01, - "cpu_time": 1.4477115496771487e-01, + "iterations": 4917, + "real_time": 1.4225059871341922e-01, + "cpu_time": 1.4224726479560690e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3080, - "real_time": 2.2534245455806906e-01, - "cpu_time": 2.2533414415584410e-01, + "iterations": 2994, + "real_time": 2.3417615903443151e-01, + "cpu_time": 2.3416278323313275e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3006, - "real_time": 2.3400538780057897e-01, - "cpu_time": 2.3400324983366630e-01, + "iterations": 2975, + "real_time": 2.3566543179399826e-01, + "cpu_time": 2.3564990386554591e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2627, - "real_time": 2.6628348109928157e-01, - "cpu_time": 2.6627249143509668e-01, + "iterations": 2582, + "real_time": 2.6798345968069537e-01, + "cpu_time": 2.6797476646010809e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2595, - "real_time": 2.7239328261055701e-01, - "cpu_time": 2.7238921310211944e-01, + "iterations": 2607, + "real_time": 2.7319310375178141e-01, + "cpu_time": 2.7318398925968451e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2627, - "real_time": 2.6650840808649140e-01, - "cpu_time": 2.6649613247049775e-01, + "iterations": 2649, + "real_time": 2.6686968568047292e-01, + "cpu_time": 2.6686162136655345e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4881, - "real_time": 1.4324055111107828e-01, - "cpu_time": 1.4323654189715235e-01, + "iterations": 4944, + "real_time": 1.4209808046329755e-01, + "cpu_time": 1.4209095307443359e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index b21eda9f..29391a63 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:03:44+00:00 +2025-09-07T14:20:10+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 4.41, 5.69, 6.86 +Load Average: 3.55, 4.41, 5.26 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.16 ms 5.16 ms 136 -MLIR_Conv2D/1 7.67 ms 7.67 ms 91 -Buddy_Conv2D/1 0.517 ms 0.517 ms 1318 -Buddy_Corr2D_Constant_Padding/1 0.874 ms 0.874 ms 810 -OpenCV_Filter2D_Constant_Padding/1 1.33 ms 1.33 ms 527 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.154 ms 0.154 ms 4520 -Buddy_Resize2D_Bilinear_Interpolation/1 0.278 ms 0.278 ms 2534 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100621 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47677 -Buddy_Erosion2D_Constant_Padding/1 0.266 ms 0.266 ms 2528 -Buddy_Dilation2D_Constant_Padding/1 0.262 ms 0.262 ms 2586 -Buddy_Opening2D_Constant_Padding/1 0.464 ms 0.464 ms 1531 -Buddy_Closing2D_Constant_Padding/1 0.462 ms 0.462 ms 1535 -Buddy_TopHat2D_Constant_Padding/1 1.12 ms 1.12 ms 599 -Buddy_BottomHat2D_Constant_Padding/1 1.13 ms 1.13 ms 647 -OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4801 -OpenCV_Opening2D_Constant_Padding/1 0.225 ms 0.225 ms 3080 -OpenCV_Closing2D_Constant_Padding/1 0.234 ms 0.234 ms 3006 -OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2627 -OpenCV_BottomHat2D_Constant_Padding/1 0.272 ms 0.272 ms 2595 -OpenCV_MorphGrad2D_Constant_Padding/1 0.267 ms 0.266 ms 2627 -OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4881 +Eigen_Convolve2D/1 5.16 ms 5.16 ms 135 +MLIR_Conv2D/1 7.57 ms 7.57 ms 92 +Buddy_Conv2D/1 0.540 ms 0.540 ms 1284 +Buddy_Corr2D_Constant_Padding/1 0.841 ms 0.841 ms 832 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 533 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4700 +Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2571 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101036 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47859 +Buddy_Erosion2D_Constant_Padding/1 0.279 ms 0.279 ms 2611 +Buddy_Dilation2D_Constant_Padding/1 0.279 ms 0.279 ms 2244 +Buddy_Opening2D_Constant_Padding/1 0.522 ms 0.522 ms 1236 +Buddy_Closing2D_Constant_Padding/1 0.447 ms 0.447 ms 1165 +Buddy_TopHat2D_Constant_Padding/1 0.939 ms 0.939 ms 715 +Buddy_BottomHat2D_Constant_Padding/1 0.956 ms 0.956 ms 723 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4917 +OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 2994 +OpenCV_Closing2D_Constant_Padding/1 0.236 ms 0.236 ms 2975 +OpenCV_TopHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2582 +OpenCV_BottomHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2607 +OpenCV_MorphGrad2D_Constant_Padding/1 0.267 ms 0.267 ms 2649 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4944 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 8d237431..f2f95365 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:04:08+00:00", + "date": "2025-09-07T14:20:33+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [4.30176,5.58203,6.80029], + "load_avg": [3.47266,4.33301,5.21826], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 138, - "real_time": 5.1051492978265323e+00, - "cpu_time": 5.1049708043478264e+00, + "iterations": 139, + "real_time": 5.0128592754439483e+00, + "cpu_time": 5.0124305971223029e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 91, - "real_time": 7.6753021023430668e+00, - "cpu_time": 7.6749714395604389e+00, + "iterations": 92, + "real_time": 7.6231503373254901e+00, + "cpu_time": 7.6229129565217395e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1299, - "real_time": 5.4009209707482586e-01, - "cpu_time": 5.4006453117782904e-01, + "iterations": 1298, + "real_time": 5.2513539481144655e-01, + "cpu_time": 5.2512819799691823e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 808, - "real_time": 8.6884597076637915e-01, - "cpu_time": 8.6882243193069264e-01, + "iterations": 840, + "real_time": 8.3748531926955494e-01, + "cpu_time": 8.3737668214285732e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 521, - "real_time": 1.3466300567944540e+00, - "cpu_time": 1.3465901938579645e+00, + "iterations": 532, + "real_time": 1.3121627901393669e+00, + "cpu_time": 1.3119710808270681e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4586, - "real_time": 1.5055361811869331e-01, - "cpu_time": 1.5055115263846494e-01, + "iterations": 4720, + "real_time": 1.4909265436610933e-01, + "cpu_time": 1.4907397690677973e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2536, - "real_time": 2.7593569418750352e-01, - "cpu_time": 2.7592326301261838e-01, + "iterations": 2565, + "real_time": 2.7453704332282902e-01, + "cpu_time": 2.7450112709551661e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 100865, - "real_time": 6.9434849884870160e-03, - "cpu_time": 6.9433250483319220e-03, + "iterations": 100545, + "real_time": 6.9568862453947087e-03, + "cpu_time": 6.9558761649012911e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 47835, - "real_time": 1.4630887664023569e-02, - "cpu_time": 1.4630675154175796e-02, + "iterations": 47892, + "real_time": 1.4604156723145325e-02, + "cpu_time": 1.4601928923410997e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2523, - "real_time": 2.7164263452181397e-01, - "cpu_time": 2.7162846452635753e-01, + "iterations": 2439, + "real_time": 2.9473153659680573e-01, + "cpu_time": 2.9469328085280849e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2535, - "real_time": 2.7691923243524524e-01, - "cpu_time": 2.7690100433925030e-01, + "iterations": 2168, + "real_time": 2.8830999164838633e-01, + "cpu_time": 2.8826715083025839e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1341, - "real_time": 4.7276657433051122e-01, - "cpu_time": 4.7274863310961951e-01, + "iterations": 1168, + "real_time": 4.9684155566541299e-01, + "cpu_time": 4.9675358390410967e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1364, - "real_time": 4.9240803045611226e-01, - "cpu_time": 4.9239791862170046e-01, + "iterations": 1106, + "real_time": 4.7481091016024407e-01, + "cpu_time": 4.7480679113924112e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 579, - "real_time": 1.1095030101366816e+00, - "cpu_time": 1.1094485198618311e+00, + "iterations": 689, + "real_time": 9.6393543659170067e-01, + "cpu_time": 9.6392819158200227e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 687, - "real_time": 1.1041596737924795e+00, - "cpu_time": 1.1041230829694306e+00, + "iterations": 707, + "real_time": 9.6352926364545266e-01, + "cpu_time": 9.6352213578500723e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4899, - "real_time": 1.4196342961061584e-01, - "cpu_time": 1.4195862380077542e-01, + "iterations": 4934, + "real_time": 1.4176106599114616e-01, + "cpu_time": 1.4175841284961477e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3199, - "real_time": 2.1802198937178477e-01, - "cpu_time": 2.1801989809315439e-01, + "iterations": 3072, + "real_time": 2.2831910731232105e-01, + "cpu_time": 2.2831633561197898e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3190, - "real_time": 2.2261308091561249e-01, - "cpu_time": 2.2260142257053250e-01, + "iterations": 3104, + "real_time": 2.2609134987188675e-01, + "cpu_time": 2.2608958762886630e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2723, - "real_time": 2.6208069962129582e-01, - "cpu_time": 2.6207134373852359e-01, + "iterations": 2646, + "real_time": 2.6597667595291352e-01, + "cpu_time": 2.6597261753590368e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2666, - "real_time": 2.6216383238126706e-01, - "cpu_time": 2.6214668942235603e-01, + "iterations": 2601, + "real_time": 2.6558996427063758e-01, + "cpu_time": 2.6558628604382972e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2677, - "real_time": 2.6168456168196119e-01, - "cpu_time": 2.6168025476279372e-01, + "iterations": 2659, + "real_time": 2.6361091008487614e-01, + "cpu_time": 2.6360892515983414e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4887, - "real_time": 1.4247795738811894e-01, - "cpu_time": 1.4247083793738480e-01, + "iterations": 4871, + "real_time": 1.4462083764718411e-01, + "cpu_time": 1.4461981831246187e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index b36785c7..136e2000 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:04:08+00:00 +2025-09-07T14:20:33+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 4.30, 5.58, 6.80 +Load Average: 3.47, 4.33, 5.22 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.11 ms 5.10 ms 138 -MLIR_Conv2D/1 7.68 ms 7.67 ms 91 -Buddy_Conv2D/1 0.540 ms 0.540 ms 1299 -Buddy_Corr2D_Constant_Padding/1 0.869 ms 0.869 ms 808 -OpenCV_Filter2D_Constant_Padding/1 1.35 ms 1.35 ms 521 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.151 ms 0.151 ms 4586 -Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2536 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100865 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47835 -Buddy_Erosion2D_Constant_Padding/1 0.272 ms 0.272 ms 2523 -Buddy_Dilation2D_Constant_Padding/1 0.277 ms 0.277 ms 2535 -Buddy_Opening2D_Constant_Padding/1 0.473 ms 0.473 ms 1341 -Buddy_Closing2D_Constant_Padding/1 0.492 ms 0.492 ms 1364 -Buddy_TopHat2D_Constant_Padding/1 1.11 ms 1.11 ms 579 -Buddy_BottomHat2D_Constant_Padding/1 1.10 ms 1.10 ms 687 -OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4899 -OpenCV_Opening2D_Constant_Padding/1 0.218 ms 0.218 ms 3199 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3190 -OpenCV_TopHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2723 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2666 -OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2677 -OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4887 +Eigen_Convolve2D/1 5.01 ms 5.01 ms 139 +MLIR_Conv2D/1 7.62 ms 7.62 ms 92 +Buddy_Conv2D/1 0.525 ms 0.525 ms 1298 +Buddy_Corr2D_Constant_Padding/1 0.837 ms 0.837 ms 840 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 532 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4720 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2565 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100545 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47892 +Buddy_Erosion2D_Constant_Padding/1 0.295 ms 0.295 ms 2439 +Buddy_Dilation2D_Constant_Padding/1 0.288 ms 0.288 ms 2168 +Buddy_Opening2D_Constant_Padding/1 0.497 ms 0.497 ms 1168 +Buddy_Closing2D_Constant_Padding/1 0.475 ms 0.475 ms 1106 +Buddy_TopHat2D_Constant_Padding/1 0.964 ms 0.964 ms 689 +Buddy_BottomHat2D_Constant_Padding/1 0.964 ms 0.964 ms 707 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4934 +OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3072 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3104 +OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2646 +OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2601 +OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2659 +OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4871 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 61b75d6a..2731abdb 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:04:32+00:00", + "date": "2025-09-07T14:20:55+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.85547,5.37354,6.69824], + "load_avg": [3.31006,4.22461,5.1582], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 57, - "real_time": 1.2151056606518594e+01, - "cpu_time": 1.2150699157894737e+01, + "real_time": 1.2186227660430106e+01, + "cpu_time": 1.2185617228070177e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 23, - "real_time": 3.0677807233903717e+01, - "cpu_time": 3.0676721304347819e+01, + "real_time": 3.0569540579681810e+01, + "cpu_time": 3.0567936217391306e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 209, - "real_time": 3.3309125836100875e+00, - "cpu_time": 3.3308292583732060e+00, + "iterations": 212, + "real_time": 3.2612147899168842e+00, + "cpu_time": 3.2610398018867928e+00, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 287, - "real_time": 2.4366429146989299e+00, - "cpu_time": 2.4365807421602783e+00, + "real_time": 2.4475226055454296e+00, + "cpu_time": 2.4473893937282227e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 161, - "real_time": 4.3121754475261858e+00, - "cpu_time": 4.3120131366459598e+00, + "iterations": 162, + "real_time": 4.3507855339550678e+00, + "cpu_time": 4.3502307716049353e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4635, - "real_time": 1.4946488854440157e-01, - "cpu_time": 1.4945905307443369e-01, + "iterations": 4702, + "real_time": 1.4982172392977394e-01, + "cpu_time": 1.4980982752020414e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2563, - "real_time": 2.7466075690379610e-01, - "cpu_time": 2.7464417362465848e-01, + "iterations": 2551, + "real_time": 2.7482590690307734e-01, + "cpu_time": 2.7479107918463358e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102703, - "real_time": 6.8518726575323881e-03, - "cpu_time": 6.8517319455127989e-03, + "iterations": 101196, + "real_time": 6.9294706260273542e-03, + "cpu_time": 6.9288548559231639e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48620, - "real_time": 1.4454776789734071e-02, - "cpu_time": 1.4454200761003693e-02, + "iterations": 47024, + "real_time": 1.4854396933289805e-02, + "cpu_time": 1.4854171763354861e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2882, - "real_time": 2.4118318528164434e-01, - "cpu_time": 2.4117433102012481e-01, + "iterations": 2302, + "real_time": 3.2017572251327342e-01, + "cpu_time": 3.2016908123371007e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2898, - "real_time": 2.3979534124447774e-01, - "cpu_time": 2.3978525431331929e-01, + "iterations": 2501, + "real_time": 3.0623350749726963e-01, + "cpu_time": 3.0622376009596158e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1888, - "real_time": 3.8621500647484752e-01, - "cpu_time": 3.8620675741525484e-01, + "iterations": 1120, + "real_time": 5.8248242296810659e-01, + "cpu_time": 5.8245296071428587e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1755, - "real_time": 4.4991118870569430e-01, - "cpu_time": 4.4989996011396011e-01, + "iterations": 1000, + "real_time": 5.7962449267506599e-01, + "cpu_time": 5.7961194700000007e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 660, - "real_time": 1.0221678989403176e+00, - "cpu_time": 1.0221497363636358e+00, + "iterations": 745, + "real_time": 9.4148450649824722e-01, + "cpu_time": 9.4141236644295290e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 688, - "real_time": 1.0242091686746408e+00, - "cpu_time": 1.0241701438953492e+00, + "iterations": 714, + "real_time": 9.2218336792720135e-01, + "cpu_time": 9.2215487114845818e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4811, - "real_time": 1.4358576572231285e-01, - "cpu_time": 1.4358440968613598e-01, + "iterations": 4928, + "real_time": 1.4172616798745155e-01, + "cpu_time": 1.4172150892857185e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3091, - "real_time": 2.2554094835378252e-01, - "cpu_time": 2.2553393950177869e-01, + "iterations": 3029, + "real_time": 2.3071269989604012e-01, + "cpu_time": 2.3070404060746205e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3116, - "real_time": 2.2321108690390079e-01, - "cpu_time": 2.2319902118100216e-01, + "iterations": 3021, + "real_time": 2.3122206552263050e-01, + "cpu_time": 2.3121828334988390e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2685, - "real_time": 2.6148691551423381e-01, - "cpu_time": 2.6147274823091216e-01, + "iterations": 2590, + "real_time": 2.7057007674314804e-01, + "cpu_time": 2.7055772625482621e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2680, - "real_time": 2.5973816527359522e-01, - "cpu_time": 2.5973222238806032e-01, + "iterations": 2602, + "real_time": 2.6801066311344013e-01, + "cpu_time": 2.6800748847040845e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2641, - "real_time": 2.6617153057181803e-01, - "cpu_time": 2.6616062021961406e-01, + "iterations": 2621, + "real_time": 2.6615193267898313e-01, + "cpu_time": 2.6614443151468892e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4895, - "real_time": 1.4290322641917219e-01, - "cpu_time": 1.4289656996935668e-01, + "iterations": 4881, + "real_time": 1.4305517117133568e-01, + "cpu_time": 1.4305138004507262e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 51fd917b..ff61fa5e 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:04:32+00:00 +2025-09-07T14:20:55+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.86, 5.37, 6.70 +Load Average: 3.31, 4.22, 5.16 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- Eigen_Convolve2D/1 12.2 ms 12.2 ms 57 -MLIR_Conv2D/1 30.7 ms 30.7 ms 23 -Buddy_Conv2D/1 3.33 ms 3.33 ms 209 -Buddy_Corr2D_Constant_Padding/1 2.44 ms 2.44 ms 287 -OpenCV_Filter2D_Constant_Padding/1 4.31 ms 4.31 ms 161 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4635 -Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2563 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102703 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48620 -Buddy_Erosion2D_Constant_Padding/1 0.241 ms 0.241 ms 2882 -Buddy_Dilation2D_Constant_Padding/1 0.240 ms 0.240 ms 2898 -Buddy_Opening2D_Constant_Padding/1 0.386 ms 0.386 ms 1888 -Buddy_Closing2D_Constant_Padding/1 0.450 ms 0.450 ms 1755 -Buddy_TopHat2D_Constant_Padding/1 1.02 ms 1.02 ms 660 -Buddy_BottomHat2D_Constant_Padding/1 1.02 ms 1.02 ms 688 -OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4811 -OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3091 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3116 -OpenCV_TopHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2685 -OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2680 -OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2641 -OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4895 +MLIR_Conv2D/1 30.6 ms 30.6 ms 23 +Buddy_Conv2D/1 3.26 ms 3.26 ms 212 +Buddy_Corr2D_Constant_Padding/1 2.45 ms 2.45 ms 287 +OpenCV_Filter2D_Constant_Padding/1 4.35 ms 4.35 ms 162 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4702 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2551 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101196 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47024 +Buddy_Erosion2D_Constant_Padding/1 0.320 ms 0.320 ms 2302 +Buddy_Dilation2D_Constant_Padding/1 0.306 ms 0.306 ms 2501 +Buddy_Opening2D_Constant_Padding/1 0.582 ms 0.582 ms 1120 +Buddy_Closing2D_Constant_Padding/1 0.580 ms 0.580 ms 1000 +Buddy_TopHat2D_Constant_Padding/1 0.941 ms 0.941 ms 745 +Buddy_BottomHat2D_Constant_Padding/1 0.922 ms 0.922 ms 714 +OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4928 +OpenCV_Opening2D_Constant_Padding/1 0.231 ms 0.231 ms 3029 +OpenCV_Closing2D_Constant_Padding/1 0.231 ms 0.231 ms 3021 +OpenCV_TopHat2D_Constant_Padding/1 0.271 ms 0.271 ms 2590 +OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2602 +OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2621 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4881 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 8bae8bbe..0c01eb95 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:04:57+00:00", + "date": "2025-09-07T14:21:20+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.30176,5.11816,6.57764], + "load_avg": [3.26123,4.14062,5.10498], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 58, - "real_time": 1.2099689322298971e+01, - "cpu_time": 1.2099262362068966e+01, + "real_time": 1.2124883910191469e+01, + "cpu_time": 1.2124201741379311e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 23, - "real_time": 3.0390074881522551e+01, - "cpu_time": 3.0389264652173903e+01, + "real_time": 3.0509588349124659e+01, + "cpu_time": 3.0508405608695647e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 213, - "real_time": 3.2895873410041343e+00, - "cpu_time": 3.2894827981220649e+00, + "iterations": 210, + "real_time": 3.3209008652539480e+00, + "cpu_time": 3.3204979476190473e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 288, - "real_time": 2.4207590354813471e+00, - "cpu_time": 2.4206542881944442e+00, + "iterations": 284, + "real_time": 2.4634649268758131e+00, + "cpu_time": 2.4633182781690137e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 162, - "real_time": 4.3342783817170583e+00, - "cpu_time": 4.3339787654320956e+00, + "iterations": 163, + "real_time": 4.3028585064264897e+00, + "cpu_time": 4.3025189877300605e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4631, - "real_time": 1.4959857160600776e-01, - "cpu_time": 1.4958810192183106e-01, + "iterations": 4709, + "real_time": 1.4914729984278990e-01, + "cpu_time": 1.4913447419834347e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2549, - "real_time": 2.7472342712685566e-01, - "cpu_time": 2.7470473715182425e-01, + "iterations": 2576, + "real_time": 2.7474065843437401e-01, + "cpu_time": 2.7471746855590068e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102077, - "real_time": 6.8577059577568673e-03, - "cpu_time": 6.8574325166296029e-03, + "iterations": 101120, + "real_time": 6.9243410864514828e-03, + "cpu_time": 6.9239005241297502e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48376, - "real_time": 1.4470884157829843e-02, - "cpu_time": 1.4470130291880297e-02, + "iterations": 47365, + "real_time": 1.4754662157099186e-02, + "cpu_time": 1.4753382729863838e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2942, - "real_time": 2.3797876818864058e-01, - "cpu_time": 2.3796595377294330e-01, + "iterations": 2236, + "real_time": 3.0625470928941090e-01, + "cpu_time": 3.0622646198568843e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2939, - "real_time": 2.4021909594049093e-01, - "cpu_time": 2.4021315787682859e-01, + "iterations": 2168, + "real_time": 2.9962818306695493e-01, + "cpu_time": 2.9958592850553478e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1345, - "real_time": 4.9373165991669693e-01, - "cpu_time": 4.9368963048327069e-01, + "iterations": 1149, + "real_time": 5.4589507240539437e-01, + "cpu_time": 5.4582348563968719e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1304, - "real_time": 4.6933880875721301e-01, - "cpu_time": 4.6931679907975549e-01, + "iterations": 1158, + "real_time": 4.3890949512392746e-01, + "cpu_time": 4.3878794473229726e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 602, - "real_time": 1.1125180523755938e+00, - "cpu_time": 1.1124859534883720e+00, + "iterations": 546, + "real_time": 9.8986280979690966e-01, + "cpu_time": 9.8963964468864618e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 575, - "real_time": 1.1323607032713683e+00, - "cpu_time": 1.1323044973913052e+00, + "iterations": 707, + "real_time": 9.5082524916238798e-01, + "cpu_time": 9.5070903111739746e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4821, - "real_time": 1.4600104651765639e-01, - "cpu_time": 1.4599840883634121e-01, + "iterations": 4900, + "real_time": 1.4259748845076076e-01, + "cpu_time": 1.4257857632653057e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2876, - "real_time": 2.3280639558906183e-01, - "cpu_time": 2.3279589916550708e-01, + "iterations": 2916, + "real_time": 2.3652738105858304e-01, + "cpu_time": 2.3650978943758605e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3193, - "real_time": 2.3576448068624747e-01, - "cpu_time": 2.3575711556529910e-01, + "iterations": 2906, + "real_time": 2.4044473193385890e-01, + "cpu_time": 2.4042902030282134e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2578, - "real_time": 2.7489120428232927e-01, - "cpu_time": 2.7488053335919271e-01, + "iterations": 2393, + "real_time": 2.8040418241794168e-01, + "cpu_time": 2.8036821521103100e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2558, - "real_time": 2.6976516830026404e-01, - "cpu_time": 2.6975646051602908e-01, + "iterations": 2495, + "real_time": 2.7553062579913701e-01, + "cpu_time": 2.7551413947895820e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2613, - "real_time": 2.6876007346097786e-01, - "cpu_time": 2.6874270340604567e-01, + "iterations": 2554, + "real_time": 2.7534571463882501e-01, + "cpu_time": 2.7532446162881763e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4816, - "real_time": 1.5157200256826672e-01, - "cpu_time": 1.5156344622092988e-01, + "iterations": 4820, + "real_time": 1.4465537957878033e-01, + "cpu_time": 1.4464847676348577e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 92457329..b98f2736 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:04:57+00:00 +2025-09-07T14:21:20+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.30, 5.12, 6.58 +Load Average: 3.26, 4.14, 5.10 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- Eigen_Convolve2D/1 12.1 ms 12.1 ms 58 -MLIR_Conv2D/1 30.4 ms 30.4 ms 23 -Buddy_Conv2D/1 3.29 ms 3.29 ms 213 -Buddy_Corr2D_Constant_Padding/1 2.42 ms 2.42 ms 288 -OpenCV_Filter2D_Constant_Padding/1 4.33 ms 4.33 ms 162 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4631 -Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2549 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102077 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48376 -Buddy_Erosion2D_Constant_Padding/1 0.238 ms 0.238 ms 2942 -Buddy_Dilation2D_Constant_Padding/1 0.240 ms 0.240 ms 2939 -Buddy_Opening2D_Constant_Padding/1 0.494 ms 0.494 ms 1345 -Buddy_Closing2D_Constant_Padding/1 0.469 ms 0.469 ms 1304 -Buddy_TopHat2D_Constant_Padding/1 1.11 ms 1.11 ms 602 -Buddy_BottomHat2D_Constant_Padding/1 1.13 ms 1.13 ms 575 -OpenCV_Erode2D_Constant_Padding/1 0.146 ms 0.146 ms 4821 -OpenCV_Opening2D_Constant_Padding/1 0.233 ms 0.233 ms 2876 -OpenCV_Closing2D_Constant_Padding/1 0.236 ms 0.236 ms 3193 -OpenCV_TopHat2D_Constant_Padding/1 0.275 ms 0.275 ms 2578 -OpenCV_BottomHat2D_Constant_Padding/1 0.270 ms 0.270 ms 2558 -OpenCV_MorphGrad2D_Constant_Padding/1 0.269 ms 0.269 ms 2613 -OpenCV_Dilate2D_Constant_Padding/1 0.152 ms 0.152 ms 4816 +MLIR_Conv2D/1 30.5 ms 30.5 ms 23 +Buddy_Conv2D/1 3.32 ms 3.32 ms 210 +Buddy_Corr2D_Constant_Padding/1 2.46 ms 2.46 ms 284 +OpenCV_Filter2D_Constant_Padding/1 4.30 ms 4.30 ms 163 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4709 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2576 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101120 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47365 +Buddy_Erosion2D_Constant_Padding/1 0.306 ms 0.306 ms 2236 +Buddy_Dilation2D_Constant_Padding/1 0.300 ms 0.300 ms 2168 +Buddy_Opening2D_Constant_Padding/1 0.546 ms 0.546 ms 1149 +Buddy_Closing2D_Constant_Padding/1 0.439 ms 0.439 ms 1158 +Buddy_TopHat2D_Constant_Padding/1 0.990 ms 0.990 ms 546 +Buddy_BottomHat2D_Constant_Padding/1 0.951 ms 0.951 ms 707 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4900 +OpenCV_Opening2D_Constant_Padding/1 0.237 ms 0.237 ms 2916 +OpenCV_Closing2D_Constant_Padding/1 0.240 ms 0.240 ms 2906 +OpenCV_TopHat2D_Constant_Padding/1 0.280 ms 0.280 ms 2393 +OpenCV_BottomHat2D_Constant_Padding/1 0.276 ms 0.276 ms 2495 +OpenCV_MorphGrad2D_Constant_Padding/1 0.275 ms 0.275 ms 2554 +OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4820 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 740cc205..b48f7511 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:05:21+00:00", + "date": "2025-09-07T14:21:44+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.19775,4.94678,6.48145], + "load_avg": [3.1709,4.04785,5.04785], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 31, - "real_time": 2.2834600099632816e+01, - "cpu_time": 2.2833829258064519e+01, + "real_time": 2.2453694574294552e+01, + "cpu_time": 2.2453529516129038e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 10, - "real_time": 7.0026343688368797e+01, - "cpu_time": 7.0024338400000005e+01, + "real_time": 6.9538136571645737e+01, + "cpu_time": 6.9535859299999998e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 104, - "real_time": 6.7405158367294531e+00, - "cpu_time": 6.7402173653846171e+00, + "iterations": 105, + "real_time": 6.5831756662754781e+00, + "cpu_time": 6.5830738380952374e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 137, - "real_time": 5.0721328134519341e+00, - "cpu_time": 5.0720212627737213e+00, + "iterations": 145, + "real_time": 4.8232713906929412e+00, + "cpu_time": 4.8231763103448246e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 77, - "real_time": 9.1429674489931632e+00, - "cpu_time": 9.1425494025974068e+00, + "iterations": 78, + "real_time": 8.9926493760102826e+00, + "cpu_time": 8.9925164102564121e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4577, - "real_time": 1.5241156727147201e-01, - "cpu_time": 1.5240754642779097e-01, + "iterations": 4719, + "real_time": 1.4828112111376565e-01, + "cpu_time": 1.4827689298580210e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2532, - "real_time": 2.7672113412270238e-01, - "cpu_time": 2.7670993601895744e-01, + "iterations": 2578, + "real_time": 2.7298830056532675e-01, + "cpu_time": 2.7298321722265312e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 100828, - "real_time": 6.9398609502726869e-03, - "cpu_time": 6.9397027016304957e-03, + "iterations": 101158, + "real_time": 6.9221397090508292e-03, + "cpu_time": 6.9218924257102776e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 47890, - "real_time": 1.4613397414785359e-02, - "cpu_time": 1.4612937857590293e-02, + "iterations": 47881, + "real_time": 1.4618278297898276e-02, + "cpu_time": 1.4618093627952623e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2591, - "real_time": 2.6190706497451505e-01, - "cpu_time": 2.6189954303357776e-01, + "iterations": 2555, + "real_time": 2.8827243325994906e-01, + "cpu_time": 2.8826175381604680e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2615, - "real_time": 2.7797788694985281e-01, - "cpu_time": 2.7796140305927314e-01, + "iterations": 2332, + "real_time": 2.7560231980338923e-01, + "cpu_time": 2.7559355960548881e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1542, - "real_time": 4.6680531139580966e-01, - "cpu_time": 4.6677065175097338e-01, + "iterations": 1208, + "real_time": 5.0620788042217690e-01, + "cpu_time": 5.0618569039735117e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1395, - "real_time": 4.5463244387325846e-01, - "cpu_time": 4.5455844802867373e-01, + "iterations": 1084, + "real_time": 5.1128306182108241e-01, + "cpu_time": 5.1127205996309932e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 630, - "real_time": 1.0993280640197178e+00, - "cpu_time": 1.0992607015872999e+00, + "iterations": 712, + "real_time": 9.4518835101737064e-01, + "cpu_time": 9.4518094662921193e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 617, - "real_time": 1.1069473827097751e+00, - "cpu_time": 1.1068412495948152e+00, + "iterations": 712, + "real_time": 9.4133523407946806e-01, + "cpu_time": 9.4132780337078559e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4809, - "real_time": 1.4524754655041400e-01, - "cpu_time": 1.4524285527136649e-01, + "iterations": 4962, + "real_time": 1.4084929968983068e-01, + "cpu_time": 1.4084782728738404e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3025, - "real_time": 2.3007626006425905e-01, - "cpu_time": 2.3006703537190112e-01, + "iterations": 2964, + "real_time": 2.3630762148482598e-01, + "cpu_time": 2.3630250944669354e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3022, - "real_time": 2.2336854805897435e-01, - "cpu_time": 2.2336302812706782e-01, + "iterations": 2964, + "real_time": 2.3484633740304131e-01, + "cpu_time": 2.3484455229419723e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2604, - "real_time": 2.6297588301922686e-01, - "cpu_time": 2.6296620890936956e-01, + "iterations": 2567, + "real_time": 2.7368364135114703e-01, + "cpu_time": 2.7367765095442176e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2702, - "real_time": 2.5860768371295434e-01, - "cpu_time": 2.5860224167283508e-01, + "iterations": 2585, + "real_time": 2.7532173425132117e-01, + "cpu_time": 2.7531788704061916e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2611, - "real_time": 2.7249755900694189e-01, - "cpu_time": 2.7247722826503162e-01, + "iterations": 2623, + "real_time": 2.6751557039114704e-01, + "cpu_time": 2.6751091498284435e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4746, - "real_time": 1.4528973452712332e-01, - "cpu_time": 1.4528467951959531e-01, + "iterations": 4903, + "real_time": 1.4305556644694115e-01, + "cpu_time": 1.4305444992861513e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index b0234846..eada4cc4 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:05:21+00:00 +2025-09-07T14:21:44+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.20, 4.95, 6.48 +Load Average: 3.17, 4.05, 5.05 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 22.8 ms 22.8 ms 31 -MLIR_Conv2D/1 70.0 ms 70.0 ms 10 -Buddy_Conv2D/1 6.74 ms 6.74 ms 104 -Buddy_Corr2D_Constant_Padding/1 5.07 ms 5.07 ms 137 -OpenCV_Filter2D_Constant_Padding/1 9.14 ms 9.14 ms 77 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.152 ms 0.152 ms 4577 -Buddy_Resize2D_Bilinear_Interpolation/1 0.277 ms 0.277 ms 2532 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100828 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47890 -Buddy_Erosion2D_Constant_Padding/1 0.262 ms 0.262 ms 2591 -Buddy_Dilation2D_Constant_Padding/1 0.278 ms 0.278 ms 2615 -Buddy_Opening2D_Constant_Padding/1 0.467 ms 0.467 ms 1542 -Buddy_Closing2D_Constant_Padding/1 0.455 ms 0.455 ms 1395 -Buddy_TopHat2D_Constant_Padding/1 1.10 ms 1.10 ms 630 -Buddy_BottomHat2D_Constant_Padding/1 1.11 ms 1.11 ms 617 -OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4809 -OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3025 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3022 -OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2604 -OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2702 -OpenCV_MorphGrad2D_Constant_Padding/1 0.272 ms 0.272 ms 2611 -OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4746 +Eigen_Convolve2D/1 22.5 ms 22.5 ms 31 +MLIR_Conv2D/1 69.5 ms 69.5 ms 10 +Buddy_Conv2D/1 6.58 ms 6.58 ms 105 +Buddy_Corr2D_Constant_Padding/1 4.82 ms 4.82 ms 145 +OpenCV_Filter2D_Constant_Padding/1 8.99 ms 8.99 ms 78 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4719 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2578 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101158 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47881 +Buddy_Erosion2D_Constant_Padding/1 0.288 ms 0.288 ms 2555 +Buddy_Dilation2D_Constant_Padding/1 0.276 ms 0.276 ms 2332 +Buddy_Opening2D_Constant_Padding/1 0.506 ms 0.506 ms 1208 +Buddy_Closing2D_Constant_Padding/1 0.511 ms 0.511 ms 1084 +Buddy_TopHat2D_Constant_Padding/1 0.945 ms 0.945 ms 712 +Buddy_BottomHat2D_Constant_Padding/1 0.941 ms 0.941 ms 712 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4962 +OpenCV_Opening2D_Constant_Padding/1 0.236 ms 0.236 ms 2964 +OpenCV_Closing2D_Constant_Padding/1 0.235 ms 0.235 ms 2964 +OpenCV_TopHat2D_Constant_Padding/1 0.274 ms 0.274 ms 2567 +OpenCV_BottomHat2D_Constant_Padding/1 0.275 ms 0.275 ms 2585 +OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2623 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4903 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index db6a9c70..6a4e41aa 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:05:44+00:00", + "date": "2025-09-07T14:22:08+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.42627,4.88428,6.42773], + "load_avg": [3.12158,3.979,5.00293], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 31, - "real_time": 2.2537152012509683e+01, - "cpu_time": 2.2535636096774191e+01, + "real_time": 2.2618762908443326e+01, + "cpu_time": 2.2618371935483868e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 10, - "real_time": 6.9293748959898949e+01, - "cpu_time": 6.9290900800000017e+01, + "real_time": 6.9524862989783287e+01, + "cpu_time": 6.9523626999999991e+01, "time_unit": "ms" }, { @@ -74,8 +74,8 @@ "repetition_index": 0, "threads": 1, "iterations": 106, - "real_time": 6.7181873996302768e+00, - "cpu_time": 6.7177365471698129e+00, + "real_time": 6.5605414433861675e+00, + "cpu_time": 6.5604865094339591e+00, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 145, - "real_time": 4.8399442999527373e+00, - "cpu_time": 4.8398396551724145e+00, + "real_time": 4.8053482226256667e+00, + "cpu_time": 4.8052233655172412e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 78, - "real_time": 9.0141488382449513e+00, - "cpu_time": 9.0134768974358952e+00, + "iterations": 77, + "real_time": 8.9911930263042450e+00, + "cpu_time": 8.9911225194805233e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4580, - "real_time": 1.5031098018046549e-01, - "cpu_time": 1.5030253820960690e-01, + "iterations": 4701, + "real_time": 1.4843348191028402e-01, + "cpu_time": 1.4843221718783239e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2544, - "real_time": 2.9538726322821474e-01, - "cpu_time": 2.9536609433962269e-01, + "iterations": 2581, + "real_time": 2.7286108469187126e-01, + "cpu_time": 2.7285397791553656e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 103180, - "real_time": 6.8470672492163889e-03, - "cpu_time": 6.8469026070944036e-03, + "iterations": 100977, + "real_time": 6.9243114111851491e-03, + "cpu_time": 6.9242313695197901e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48350, - "real_time": 1.4463258982321099e-02, - "cpu_time": 1.4462680806618422e-02, + "iterations": 47816, + "real_time": 1.4641848158278912e-02, + "cpu_time": 1.4641626714907138e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2630, - "real_time": 2.6526380146184347e-01, - "cpu_time": 2.6525710494296539e-01, + "iterations": 2338, + "real_time": 3.2029897643777427e-01, + "cpu_time": 3.2029361420017116e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2560, - "real_time": 2.7432749484432861e-01, - "cpu_time": 2.7431482539062507e-01, + "iterations": 2051, + "real_time": 3.0370511982802473e-01, + "cpu_time": 3.0369885811799063e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1570, - "real_time": 4.5010410961072156e-01, - "cpu_time": 4.5009547324840748e-01, + "iterations": 1078, + "real_time": 5.8860488664462529e-01, + "cpu_time": 5.8859645361780988e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1546, - "real_time": 4.4200593969491908e-01, - "cpu_time": 4.4199154010349267e-01, + "iterations": 1041, + "real_time": 5.0554771645497409e-01, + "cpu_time": 5.0554403746397636e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 646, - "real_time": 1.0057703848501478e+00, - "cpu_time": 1.0057355030959745e+00, + "iterations": 676, + "real_time": 9.8800656424295263e-01, + "cpu_time": 9.8798956065088772e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 660, - "real_time": 1.0330350335800287e+00, - "cpu_time": 1.0329946303030324e+00, + "iterations": 681, + "real_time": 9.9229560440157305e-01, + "cpu_time": 9.9226728928046803e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4837, - "real_time": 1.4295056154221722e-01, - "cpu_time": 1.4294818709944210e-01, + "iterations": 4906, + "real_time": 1.4288953546840125e-01, + "cpu_time": 1.4288746534855290e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3029, - "real_time": 2.3049620383247837e-01, - "cpu_time": 2.3049112611422914e-01, + "iterations": 2934, + "real_time": 2.3779297538289587e-01, + "cpu_time": 2.3778759066121333e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3041, - "real_time": 2.2969253922284963e-01, - "cpu_time": 2.2968828707661890e-01, + "iterations": 2930, + "real_time": 2.3896080591165977e-01, + "cpu_time": 2.3895895563139949e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2631, - "real_time": 2.6287287417041716e-01, - "cpu_time": 2.6285376358798923e-01, + "iterations": 2536, + "real_time": 2.7688780405825997e-01, + "cpu_time": 2.7688168966877036e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2545, - "real_time": 2.6360817092575117e-01, - "cpu_time": 2.6360576895874238e-01, + "iterations": 2520, + "real_time": 2.7749545369592926e-01, + "cpu_time": 2.7749185238095248e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2557, - "real_time": 2.7465159900262109e-01, - "cpu_time": 2.7463849980445809e-01, + "iterations": 2580, + "real_time": 2.7239541126083033e-01, + "cpu_time": 2.7239121627906959e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4852, - "real_time": 1.4438946414293580e-01, - "cpu_time": 1.4438811150041275e-01, + "iterations": 4911, + "real_time": 1.4269597732809600e-01, + "cpu_time": 1.4269491427407904e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 458ce4ce..d80f7666 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:05:44+00:00 +2025-09-07T14:22:08+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.43, 4.88, 6.43 +Load Average: 3.12, 3.98, 5.00 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 22.5 ms 22.5 ms 31 -MLIR_Conv2D/1 69.3 ms 69.3 ms 10 -Buddy_Conv2D/1 6.72 ms 6.72 ms 106 -Buddy_Corr2D_Constant_Padding/1 4.84 ms 4.84 ms 145 -OpenCV_Filter2D_Constant_Padding/1 9.01 ms 9.01 ms 78 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4580 -Buddy_Resize2D_Bilinear_Interpolation/1 0.295 ms 0.295 ms 2544 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 103180 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48350 -Buddy_Erosion2D_Constant_Padding/1 0.265 ms 0.265 ms 2630 -Buddy_Dilation2D_Constant_Padding/1 0.274 ms 0.274 ms 2560 -Buddy_Opening2D_Constant_Padding/1 0.450 ms 0.450 ms 1570 -Buddy_Closing2D_Constant_Padding/1 0.442 ms 0.442 ms 1546 -Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 646 -Buddy_BottomHat2D_Constant_Padding/1 1.03 ms 1.03 ms 660 -OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4837 -OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3029 -OpenCV_Closing2D_Constant_Padding/1 0.230 ms 0.230 ms 3041 -OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2631 -OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2545 -OpenCV_MorphGrad2D_Constant_Padding/1 0.275 ms 0.275 ms 2557 -OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4852 +Eigen_Convolve2D/1 22.6 ms 22.6 ms 31 +MLIR_Conv2D/1 69.5 ms 69.5 ms 10 +Buddy_Conv2D/1 6.56 ms 6.56 ms 106 +Buddy_Corr2D_Constant_Padding/1 4.81 ms 4.81 ms 145 +OpenCV_Filter2D_Constant_Padding/1 8.99 ms 8.99 ms 77 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4701 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2581 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100977 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47816 +Buddy_Erosion2D_Constant_Padding/1 0.320 ms 0.320 ms 2338 +Buddy_Dilation2D_Constant_Padding/1 0.304 ms 0.304 ms 2051 +Buddy_Opening2D_Constant_Padding/1 0.589 ms 0.589 ms 1078 +Buddy_Closing2D_Constant_Padding/1 0.506 ms 0.506 ms 1041 +Buddy_TopHat2D_Constant_Padding/1 0.988 ms 0.988 ms 676 +Buddy_BottomHat2D_Constant_Padding/1 0.992 ms 0.992 ms 681 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4906 +OpenCV_Opening2D_Constant_Padding/1 0.238 ms 0.238 ms 2934 +OpenCV_Closing2D_Constant_Padding/1 0.239 ms 0.239 ms 2930 +OpenCV_TopHat2D_Constant_Padding/1 0.277 ms 0.277 ms 2536 +OpenCV_BottomHat2D_Constant_Padding/1 0.277 ms 0.277 ms 2520 +OpenCV_MorphGrad2D_Constant_Padding/1 0.272 ms 0.272 ms 2580 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4911 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 71e3e968..c44dc8c1 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:06:09+00:00", + "date": "2025-09-07T14:22:31+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.2749,4.73193,6.33545], + "load_avg": [3.0791,3.89893,4.94873], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 19, - "real_time": 3.5934302563730036e+01, - "cpu_time": 3.5933023947368419e+01, + "real_time": 3.6331885739376673e+01, + "cpu_time": 3.6330795842105260e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 6, - "real_time": 1.2292776505152385e+02, - "cpu_time": 1.2292239783333336e+02, + "real_time": 1.2376248215635617e+02, + "cpu_time": 1.2375778916666665e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 62, - "real_time": 1.1396257747565546e+01, - "cpu_time": 1.1395874725806449e+01, + "iterations": 61, + "real_time": 1.1424725783652947e+01, + "cpu_time": 1.1424135098360654e+01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 85, - "real_time": 8.2714001045507537e+00, - "cpu_time": 8.2711988235294083e+00, + "iterations": 86, + "real_time": 8.1076199232145800e+00, + "cpu_time": 8.1065335465116259e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 115, - "real_time": 6.0265639877837636e+00, - "cpu_time": 6.0262306782608634e+00, + "iterations": 116, + "real_time": 6.0680480673909187e+00, + "cpu_time": 6.0677083189655177e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4572, - "real_time": 1.5129561903491318e-01, - "cpu_time": 1.5129300874890639e-01, + "iterations": 4691, + "real_time": 1.4930519832651948e-01, + "cpu_time": 1.4926992773395867e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2518, - "real_time": 2.7770467096609763e-01, - "cpu_time": 2.7769441262907074e-01, + "iterations": 2573, + "real_time": 2.7582971932959383e-01, + "cpu_time": 2.7580277924601621e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 100684, - "real_time": 6.9422946241566785e-03, - "cpu_time": 6.9421268324659320e-03, + "iterations": 100634, + "real_time": 6.9968619756626393e-03, + "cpu_time": 6.9965185225669300e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48008, - "real_time": 1.4623689299632144e-02, - "cpu_time": 1.4622668409431758e-02, + "iterations": 43490, + "real_time": 1.4810458127178799e-02, + "cpu_time": 1.4809136491147392e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2923, - "real_time": 2.4639664194251917e-01, - "cpu_time": 2.4638985083818007e-01, + "iterations": 2224, + "real_time": 3.1000987975020633e-01, + "cpu_time": 3.0994578911870491e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2623, - "real_time": 2.4554658904131979e-01, - "cpu_time": 2.4553771406786179e-01, + "iterations": 2187, + "real_time": 3.0433967416560098e-01, + "cpu_time": 3.0430470416095112e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1783, - "real_time": 4.1326654179672545e-01, - "cpu_time": 4.1323844083006184e-01, + "iterations": 1079, + "real_time": 5.5283736834601194e-01, + "cpu_time": 5.5278239110287286e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1718, - "real_time": 4.1503581512238008e-01, - "cpu_time": 4.1502305005820794e-01, + "iterations": 1084, + "real_time": 4.6356722740009704e-01, + "cpu_time": 4.6354099538745458e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 651, - "real_time": 1.0203976990989825e+00, - "cpu_time": 1.0203406251920124e+00, + "iterations": 671, + "real_time": 9.9508046837393116e-01, + "cpu_time": 9.9501271982116168e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 631, - "real_time": 1.1531298245095225e+00, - "cpu_time": 1.1530850871632337e+00, + "iterations": 694, + "real_time": 9.6488717717980099e-01, + "cpu_time": 9.6476614553314033e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4907, - "real_time": 1.4306778562319164e-01, - "cpu_time": 1.4306101080089656e-01, + "iterations": 4872, + "real_time": 1.4356119720317265e-01, + "cpu_time": 1.4355267775041070e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3140, - "real_time": 2.2325247858360314e-01, - "cpu_time": 2.2324565222929896e-01, + "iterations": 3052, + "real_time": 2.3091042616928234e-01, + "cpu_time": 2.3088996100917433e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3122, - "real_time": 2.2345905423088000e-01, - "cpu_time": 2.2345698334401021e-01, + "iterations": 3055, + "real_time": 2.2933373499033688e-01, + "cpu_time": 2.2932497119476267e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2724, - "real_time": 2.6043242174535769e-01, - "cpu_time": 2.6042061894273144e-01, + "iterations": 2601, + "real_time": 2.6950421458434620e-01, + "cpu_time": 2.6948042560553648e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2683, - "real_time": 2.5787628267933704e-01, - "cpu_time": 2.5786502795378330e-01, + "iterations": 2655, + "real_time": 2.6802309769470589e-01, + "cpu_time": 2.6800724218455829e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2682, - "real_time": 2.6200129459546095e-01, - "cpu_time": 2.6198928560775564e-01, + "iterations": 2674, + "real_time": 2.6259915979834036e-01, + "cpu_time": 2.6257767726252695e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4938, - "real_time": 1.4114592551774657e-01, - "cpu_time": 1.4114345787768337e-01, + "iterations": 5002, + "real_time": 1.4069610041101091e-01, + "cpu_time": 1.4068704078368685e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 46a4f823..9d3be0fe 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:06:09+00:00 +2025-09-07T14:22:31+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.27, 4.73, 6.34 +Load Average: 3.08, 3.90, 4.95 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 35.9 ms 35.9 ms 19 -MLIR_Conv2D/1 123 ms 123 ms 6 -Buddy_Conv2D/1 11.4 ms 11.4 ms 62 -Buddy_Corr2D_Constant_Padding/1 8.27 ms 8.27 ms 85 -OpenCV_Filter2D_Constant_Padding/1 6.03 ms 6.03 ms 115 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.151 ms 0.151 ms 4572 -Buddy_Resize2D_Bilinear_Interpolation/1 0.278 ms 0.278 ms 2518 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100684 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 48008 -Buddy_Erosion2D_Constant_Padding/1 0.246 ms 0.246 ms 2923 -Buddy_Dilation2D_Constant_Padding/1 0.246 ms 0.246 ms 2623 -Buddy_Opening2D_Constant_Padding/1 0.413 ms 0.413 ms 1783 -Buddy_Closing2D_Constant_Padding/1 0.415 ms 0.415 ms 1718 -Buddy_TopHat2D_Constant_Padding/1 1.02 ms 1.02 ms 651 -Buddy_BottomHat2D_Constant_Padding/1 1.15 ms 1.15 ms 631 -OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4907 -OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3140 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3122 -OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2724 -OpenCV_BottomHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2683 -OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2682 -OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4938 +Eigen_Convolve2D/1 36.3 ms 36.3 ms 19 +MLIR_Conv2D/1 124 ms 124 ms 6 +Buddy_Conv2D/1 11.4 ms 11.4 ms 61 +Buddy_Corr2D_Constant_Padding/1 8.11 ms 8.11 ms 86 +OpenCV_Filter2D_Constant_Padding/1 6.07 ms 6.07 ms 116 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4691 +Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2573 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100634 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 43490 +Buddy_Erosion2D_Constant_Padding/1 0.310 ms 0.310 ms 2224 +Buddy_Dilation2D_Constant_Padding/1 0.304 ms 0.304 ms 2187 +Buddy_Opening2D_Constant_Padding/1 0.553 ms 0.553 ms 1079 +Buddy_Closing2D_Constant_Padding/1 0.464 ms 0.464 ms 1084 +Buddy_TopHat2D_Constant_Padding/1 0.995 ms 0.995 ms 671 +Buddy_BottomHat2D_Constant_Padding/1 0.965 ms 0.965 ms 694 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4872 +OpenCV_Opening2D_Constant_Padding/1 0.231 ms 0.231 ms 3052 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3055 +OpenCV_TopHat2D_Constant_Padding/1 0.270 ms 0.269 ms 2601 +OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2655 +OpenCV_MorphGrad2D_Constant_Padding/1 0.263 ms 0.263 ms 2674 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 5002 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index f2528e67..84797d22 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:06:33+00:00", + "date": "2025-09-07T14:22:55+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.90137,4.52734,6.22461], + "load_avg": [3.05127,3.8252,4.89551], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 19, - "real_time": 3.6031454017287807e+01, - "cpu_time": 3.6030928210526312e+01, + "real_time": 3.5804433257956255e+01, + "cpu_time": 3.5803924105263171e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 6, - "real_time": 1.2390441261231899e+02, - "cpu_time": 1.2389855900000002e+02, + "real_time": 1.2386105209589005e+02, + "cpu_time": 1.2385802349999996e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 61, - "real_time": 1.1435016989707947e+01, - "cpu_time": 1.1434898901639347e+01, + "iterations": 62, + "real_time": 1.1243964154874124e+01, + "cpu_time": 1.1243781112903225e+01, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 86, - "real_time": 8.0684243679739716e+00, - "cpu_time": 8.0681411860465122e+00, + "real_time": 8.1609252320472585e+00, + "cpu_time": 8.1606621744186025e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 116, - "real_time": 6.0479930312982919e+00, - "cpu_time": 6.0477856465517252e+00, + "iterations": 115, + "real_time": 6.0534707230070364e+00, + "cpu_time": 6.0533277826086938e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4547, - "real_time": 1.4929789562316578e-01, - "cpu_time": 1.4929170332087077e-01, + "iterations": 4691, + "real_time": 1.4979387287060808e-01, + "cpu_time": 1.4979121743764645e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2567, - "real_time": 2.7209066542429267e-01, - "cpu_time": 2.7208513245033111e-01, + "iterations": 2560, + "real_time": 2.7500972501002252e-01, + "cpu_time": 2.7500316249999995e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102031, - "real_time": 6.8424128531881564e-03, - "cpu_time": 6.8420196802932457e-03, + "iterations": 100967, + "real_time": 6.9533759701305943e-03, + "cpu_time": 6.9528367090237403e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 47780, - "real_time": 1.4569356260987295e-02, - "cpu_time": 1.4568733444956036e-02, + "iterations": 47401, + "real_time": 1.4758331940115305e-02, + "cpu_time": 1.4757342018100885e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2938, - "real_time": 2.3473830547480456e-01, - "cpu_time": 2.3473132573179031e-01, + "iterations": 2397, + "real_time": 3.0549748811962507e-01, + "cpu_time": 3.0548149436796035e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2922, - "real_time": 2.3591566211266685e-01, - "cpu_time": 2.3590406913073245e-01, + "iterations": 2198, + "real_time": 2.8026699375791697e-01, + "cpu_time": 2.8025402229299401e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1840, - "real_time": 3.6312726164317649e-01, - "cpu_time": 3.6310779728260872e-01, + "iterations": 1071, + "real_time": 5.2040776743942285e-01, + "cpu_time": 5.2038497292250174e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1905, - "real_time": 3.7938127015519330e-01, - "cpu_time": 3.7936665511811024e-01, + "iterations": 1510, + "real_time": 3.8458729865929936e-01, + "cpu_time": 3.8456946887417259e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 672, - "real_time": 9.9142064296063925e-01, - "cpu_time": 9.9135187946428460e-01, + "iterations": 695, + "real_time": 9.8917052792988236e-01, + "cpu_time": 9.8913716402877794e-01, "time_unit": "ms" }, { @@ -242,8 +242,8 @@ "repetition_index": 0, "threads": 1, "iterations": 684, - "real_time": 9.8383305213081906e-01, - "cpu_time": 9.8378608333333095e-01, + "real_time": 9.6726054815869578e-01, + "cpu_time": 9.6718889619882953e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4933, - "real_time": 1.4217775303718649e-01, - "cpu_time": 1.4216965781471705e-01, + "iterations": 4878, + "real_time": 1.4255527756701927e-01, + "cpu_time": 1.4255166113161119e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3207, - "real_time": 2.2116656922498595e-01, - "cpu_time": 2.2115779700654803e-01, + "iterations": 3098, + "real_time": 2.2962313864483536e-01, + "cpu_time": 2.2960967947062669e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3201, - "real_time": 2.1932033547867988e-01, - "cpu_time": 2.1931359012808557e-01, + "iterations": 3043, + "real_time": 2.3499211144024237e-01, + "cpu_time": 2.3498555570160989e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2623, - "real_time": 2.5874181724076412e-01, - "cpu_time": 2.5873152497140717e-01, + "iterations": 2619, + "real_time": 2.6389677824296764e-01, + "cpu_time": 2.6389314623902327e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2746, - "real_time": 2.5197450893687579e-01, - "cpu_time": 2.5196135906773559e-01, + "iterations": 2682, + "real_time": 2.5938538102451142e-01, + "cpu_time": 2.5937977852349003e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2650, - "real_time": 2.6740114503311663e-01, - "cpu_time": 2.6738492188679175e-01, + "iterations": 2671, + "real_time": 2.6242634423079897e-01, + "cpu_time": 2.6242075926619268e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4842, - "real_time": 1.4409735623419359e-01, - "cpu_time": 1.4409410636100756e-01, + "iterations": 4966, + "real_time": 1.4111212860947409e-01, + "cpu_time": 1.4111019734192456e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 34d10b59..cf470496 100644 --- a/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:06:33+00:00 +2025-09-07T14:22:55+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.90, 4.53, 6.22 +Load Average: 3.05, 3.83, 4.90 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 36.0 ms 36.0 ms 19 +Eigen_Convolve2D/1 35.8 ms 35.8 ms 19 MLIR_Conv2D/1 124 ms 124 ms 6 -Buddy_Conv2D/1 11.4 ms 11.4 ms 61 -Buddy_Corr2D_Constant_Padding/1 8.07 ms 8.07 ms 86 -OpenCV_Filter2D_Constant_Padding/1 6.05 ms 6.05 ms 116 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4547 -Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2567 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102031 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47780 -Buddy_Erosion2D_Constant_Padding/1 0.235 ms 0.235 ms 2938 -Buddy_Dilation2D_Constant_Padding/1 0.236 ms 0.236 ms 2922 -Buddy_Opening2D_Constant_Padding/1 0.363 ms 0.363 ms 1840 -Buddy_Closing2D_Constant_Padding/1 0.379 ms 0.379 ms 1905 -Buddy_TopHat2D_Constant_Padding/1 0.991 ms 0.991 ms 672 -Buddy_BottomHat2D_Constant_Padding/1 0.984 ms 0.984 ms 684 -OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4933 -OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3207 -OpenCV_Closing2D_Constant_Padding/1 0.219 ms 0.219 ms 3201 -OpenCV_TopHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2623 -OpenCV_BottomHat2D_Constant_Padding/1 0.252 ms 0.252 ms 2746 -OpenCV_MorphGrad2D_Constant_Padding/1 0.267 ms 0.267 ms 2650 -OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4842 +Buddy_Conv2D/1 11.2 ms 11.2 ms 62 +Buddy_Corr2D_Constant_Padding/1 8.16 ms 8.16 ms 86 +OpenCV_Filter2D_Constant_Padding/1 6.05 ms 6.05 ms 115 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4691 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2560 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100967 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47401 +Buddy_Erosion2D_Constant_Padding/1 0.305 ms 0.305 ms 2397 +Buddy_Dilation2D_Constant_Padding/1 0.280 ms 0.280 ms 2198 +Buddy_Opening2D_Constant_Padding/1 0.520 ms 0.520 ms 1071 +Buddy_Closing2D_Constant_Padding/1 0.385 ms 0.385 ms 1510 +Buddy_TopHat2D_Constant_Padding/1 0.989 ms 0.989 ms 695 +Buddy_BottomHat2D_Constant_Padding/1 0.967 ms 0.967 ms 684 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4878 +OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3098 +OpenCV_Closing2D_Constant_Padding/1 0.235 ms 0.235 ms 3043 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2619 +OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2682 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2671 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4966 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log index 00654642..b270fff5 100644 --- a/test_result/imageprocessing/image-processing-result.log +++ b/test_result/imageprocessing/image-processing-result.log @@ -1,4 +1,4 @@ -Benchmark results - Sun Sep 7 13:02:48 UTC 2025 +Benchmark results - Sun Sep 7 14:19:23 UTC 2025 Testing SSE support SSE is supported. Running image-processing-benchmark for SSE @@ -6,62 +6,44 @@ Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random [Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING [Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -[Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -[Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -[Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -[Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -[Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -[Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING -[Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING -[Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -[Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -[Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -[Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING -[Success] … -Testing AVX2 support -AVX2 is supported. -Running image-processing-benchmark for AVX2 -Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING -[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING [Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING [Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING [Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING [Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING [Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING [Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING [Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING [Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING [Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING [Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING [Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING [Success] … -Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING [Success] … -Testing AVX512 support -CPU does not support AVX512. -Testing NEON support -CPU does not support NEON. +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING From 55122adcfbcf90ecb1f1aa0334bf1d67a5b4059c Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 16:27:19 +0200 Subject: [PATCH 48/52] update --- .github/workflows/bench.yml | 58 +++--- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 156 +++++++-------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 ++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 ++++++------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 ++--- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 ++++++------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 ++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 136 ++++++------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 ++--- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 ++++++------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 ++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 179 ++++++------------ ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 64 ++----- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 52 +++-- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 29 ++- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 132 ++++++------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 46 ++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 134 ++++++------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 46 ++--- .../image-processing-result.log | 38 ++++ 20 files changed, 851 insertions(+), 863 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index f5b43284..141e9a4c 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -79,9 +79,7 @@ jobs: # -------- rebuild the 'latest' folder with a meta-refresh ------------ rm -rf latest mkdir -p latest - cat > latest/index.html < -EOF + printf '%s\n' "" > latest/index.html echo "[Info] benchmarks/latest now points to ../${latest}/" # ------------------------------------------------------------ @@ -91,9 +89,7 @@ EOF working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site run: | set -e - cat > benchmarks/index.html <<'EOF' - -EOF + printf '%s\n' "" > benchmarks/index.html - name: Upload site artifact uses: actions/upload-pages-artifact@v3 @@ -105,23 +101,24 @@ EOF run_root="${{ env.BENCH_DIR }}" mkdir -p "$run_root" - cat > "$run_root/index.html" <<'EOF' - --- - layout: default - title: Benchmark run - nav_exclude: true - --- - -

          Benchmark results

          - -
            - {% for f in site.static_files %} - {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %} -
          • {{ f.name }}
          • - {% endif %} - {% endfor %} -
          -EOF + { + printf '%s\n' \ + '---' \ + 'layout: default' \ + 'title: Benchmark run' \ + 'nav_exclude: true' \ + '---' \ + '' \ + '

          Benchmark results

          ' \ + '' \ + '
            ' \ + '{% for f in site.static_files %}' \ + ' {% if f.path contains page.dir and f.name != "index.html" and f.extname == ".html" %}' \ + '
          • {{ f.name }}
          • ' \ + ' {% endif %}' \ + '{% endfor %}' \ + '
          ' + } > "$run_root/index.html" - name: Build top-level benchmarks index (list all runs) @@ -131,14 +128,13 @@ EOF out=benchmarks/index.html mkdir -p benchmarks { - cat <<'HTML' - --- - layout: default - title: Benchmarks - --- -

          Benchmark runs

          -

          Select a date and commit:

          -HTML + printf '%s\n' \ + '---' \ + 'layout: default' \ + 'title: Benchmarks' \ + '---' \ + '

          Benchmark runs

          ' \ + '

          Select a date and commit:

          ' # List dates newest first for d in $(ls -1d benchmarks/20*/ | sort -r); do diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 481e741d..97022f6f 100644 --- a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:08:44+00:00", + "date": "2025-09-07T14:25:01+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.36963,4.10254,5.83887], + "load_avg": [4.38672,3.83301,4.74805], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 141, - "real_time": 4.9913290703127569e+00, - "cpu_time": 4.9912880992907809e+00, + "iterations": 136, + "real_time": 5.0944548702853565e+00, + "cpu_time": 5.0941901617647067e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 93, - "real_time": 7.5341148642442564e+00, - "cpu_time": 7.5340402903225794e+00, + "iterations": 92, + "real_time": 7.5879288108452503e+00, + "cpu_time": 7.5877727717391323e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2162, - "real_time": 3.2283163781964480e-01, - "cpu_time": 3.2282717576318221e-01, + "iterations": 2126, + "real_time": 3.2648893102989485e-01, + "cpu_time": 3.2648295437441222e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 841, - "real_time": 8.3219741474433973e-01, - "cpu_time": 8.3218935790725346e-01, + "iterations": 810, + "real_time": 8.6173117160797119e-01, + "cpu_time": 8.6171702345678991e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 536, - "real_time": 1.3002738662397684e+00, - "cpu_time": 1.3002208395522381e+00, + "iterations": 532, + "real_time": 1.3098518987347310e+00, + "cpu_time": 1.3098337011278192e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4661, - "real_time": 1.4891540657627406e-01, - "cpu_time": 1.4890615962239859e-01, + "iterations": 4693, + "real_time": 1.4864658349845142e-01, + "cpu_time": 1.4864443596846369e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2568, - "real_time": 2.7335639259664812e-01, - "cpu_time": 2.7334750467289703e-01, + "iterations": 2574, + "real_time": 2.7232925843942418e-01, + "cpu_time": 2.7232468376068386e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101674, - "real_time": 6.8376584089808075e-03, - "cpu_time": 6.8374261954875460e-03, + "iterations": 101147, + "real_time": 6.9248195898291617e-03, + "cpu_time": 6.9247274758519840e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48511, - "real_time": 1.4459907311912134e-02, - "cpu_time": 1.4459377027890576e-02, + "iterations": 47876, + "real_time": 1.4619754012793923e-02, + "cpu_time": 1.4619547852786375e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2688, - "real_time": 2.5685550090635106e-01, - "cpu_time": 2.5684523921130975e-01, + "iterations": 1964, + "real_time": 3.4232623116426214e-01, + "cpu_time": 3.4231908757637480e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2734, - "real_time": 2.6675885585467135e-01, - "cpu_time": 2.6675183504023386e-01, + "iterations": 1987, + "real_time": 3.5313764707971351e-01, + "cpu_time": 3.5311560694514321e-01, "time_unit": "ms" }, { @@ -199,11 +199,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1483, - "real_time": 4.7402130094478040e-01, - "cpu_time": 4.7401253742414040e-01, + "iterations": 1000, + "real_time": 7.3933277651667595e-01, + "cpu_time": 7.3929136599999978e-01, "time_unit": "ms" - }, + ,, { "name": "Buddy_Closing2D_Constant_Padding/1", "family_index": 12, @@ -213,11 +213,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1350, - "real_time": 3.9245121732906058e-01, - "cpu_time": 3.9244753037037028e-01, + "iterations": 1338, + "real_time": 6.8773021879752116e-01, + "cpu_time": 6.8761599701046450e-01, "time_unit": "ms" - }, + ,, { "name": "Buddy_TopHat2D_Constant_Padding/1", "family_index": 13, @@ -227,11 +227,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 666, - "real_time": 1.0719478924621690e+00, - "cpu_time": 1.0719183033033035e+00, + "iterations": 449, + "real_time": 1.3737666746290329e+00, + "cpu_time": 1.3737399821826268e+00, "time_unit": "ms" - }, + ,, { "name": "Buddy_BottomHat2D_Constant_Padding/1", "family_index": 14, @@ -241,11 +241,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 593, - "real_time": 1.1050101011539188e+00, - "cpu_time": 1.1049833305227643e+00, + "iterations": 451, + "real_time": 1.1508310349976145e+00, + "cpu_time": 1.1508074767184044e+00, "time_unit": "ms" - }, + ,, { "name": "OpenCV_Erode2D_Constant_Padding/1", "family_index": 15, @@ -255,11 +255,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4812, - "real_time": 1.4599609937354713e-01, - "cpu_time": 1.4599082543640898e-01, + "iterations": 4246, + "real_time": 1.6527650926088422e-01, + "cpu_time": 1.6527523033443248e-01, "time_unit": "ms" - }, + ,, { "name": "OpenCV_Opening2D_Constant_Padding/1", "family_index": 16, @@ -269,11 +269,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3117, - "real_time": 2.1899228256445571e-01, - "cpu_time": 2.1898928424767403e-01, + "iterations": 2717, + "real_time": 2.5677720045709945e-01, + "cpu_time": 2.5677142694147959e-01, "time_unit": "ms" - }, + ,, { "name": "OpenCV_Closing2D_Constant_Padding/1", "family_index": 17, @@ -283,11 +283,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3169, - "real_time": 2.2265837701497751e-01, - "cpu_time": 2.2265226285894635e-01, + "iterations": 2783, + "real_time": 2.5039162999470799e-01, + "cpu_time": 2.5038794178943619e-01, "time_unit": "ms" - }, + ,, { "name": "OpenCV_TopHat2D_Constant_Padding/1", "family_index": 18, @@ -297,11 +297,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2719, - "real_time": 2.5787956823580138e-01, - "cpu_time": 2.5787464913571129e-01, + "iterations": 2439, + "real_time": 2.8731209325614449e-01, + "cpu_time": 2.8730276875768884e-01, "time_unit": "ms" - }, + ,, { "name": "OpenCV_BottomHat2D_Constant_Padding/1", "family_index": 19, @@ -311,11 +311,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2731, - "real_time": 2.5586019458023457e-01, - "cpu_time": 2.5585493079458066e-01, + "iterations": 2429, + "real_time": 2.8720653292670473e-01, + "cpu_time": 2.8719969699464831e-01, "time_unit": "ms" - }, + ,, { "name": "OpenCV_MorphGrad2D_Constant_Padding/1", "family_index": 20, @@ -325,11 +325,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2598, - "real_time": 2.7497063137826044e-01, - "cpu_time": 2.7496818745188600e-01, + "iterations": 2460, + "real_time": 2.8502542555816773e-01, + "cpu_time": 2.8502001626016238e-01, "time_unit": "ms" - }, + ,, { "name": "OpenCV_Dilate2D_Constant_Padding/1", "family_index": 21, @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4696, - "real_time": 1.4943660600221950e-01, - "cpu_time": 1.4943375085178881e-01, + "iterations": 4294, + "real_time": 1.6273279480452199e-01, + "cpu_time": 1.6272675011644155e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 439cfbbd..d7b44d35 100644 --- a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:08:44+00:00 +2025-09-07T14:25:01+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.37, 4.10, 5.84 +Load Average: 4.39, 3.83, 4.75 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.99 ms 4.99 ms 141 -MLIR_Conv2D/1 7.53 ms 7.53 ms 93 -Buddy_Conv2D/1 0.323 ms 0.323 ms 2162 -Buddy_Corr2D_Constant_Padding/1 0.832 ms 0.832 ms 841 -OpenCV_Filter2D_Constant_Padding/1 1.30 ms 1.30 ms 536 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4661 -Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2568 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101674 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48511 -Buddy_Erosion2D_Constant_Padding/1 0.257 ms 0.257 ms 2688 -Buddy_Dilation2D_Constant_Padding/1 0.267 ms 0.267 ms 2734 -Buddy_Opening2D_Constant_Padding/1 0.474 ms 0.474 ms 1483 -Buddy_Closing2D_Constant_Padding/1 0.392 ms 0.392 ms 1350 -Buddy_TopHat2D_Constant_Padding/1 1.07 ms 1.07 ms 666 -Buddy_BottomHat2D_Constant_Padding/1 1.11 ms 1.10 ms 593 -OpenCV_Erode2D_Constant_Padding/1 0.146 ms 0.146 ms 4812 -OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3117 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3169 -OpenCV_TopHat2D_Constant_Padding/1 0.258 ms 0.258 ms 2719 -OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2731 -OpenCV_MorphGrad2D_Constant_Padding/1 0.275 ms 0.275 ms 2598 -OpenCV_Dilate2D_Constant_Padding/1 0.149 ms 0.149 ms 4696 +Eigen_Convolve2D/1 5.09 ms 5.09 ms 136 +MLIR_Conv2D/1 7.59 ms 7.59 ms 92 +Buddy_Conv2D/1 0.326 ms 0.326 ms 2126 +Buddy_Corr2D_Constant_Padding/1 0.862 ms 0.862 ms 810 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 532 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4693 +Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2574 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101147 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47876 +Buddy_Erosion2D_Constant_Padding/1 0.342 ms 0.342 ms 1964 +Buddy_Dilation2D_Constant_Padding/1 0.353 ms 0.353 ms 1987 +Buddy_Opening2D_Constant_Padding/1 0.739 ms 0.739 ms 1000 +Buddy_Closing2D_Constant_Padding/1 0.688 ms 0.688 ms 1338 +Buddy_TopHat2D_Constant_Padding/1 1.37 ms 1.37 ms 449 +Buddy_BottomHat2D_Constant_Padding/1 1.15 ms 1.15 ms 451 +OpenCV_Erode2D_Constant_Padding/1 0.165 ms 0.165 ms 4246 +OpenCV_Opening2D_Constant_Padding/1 0.257 ms 0.257 ms 2717 +OpenCV_Closing2D_Constant_Padding/1 0.250 ms 0.250 ms 2783 +OpenCV_TopHat2D_Constant_Padding/1 0.287 ms 0.287 ms 2439 +OpenCV_BottomHat2D_Constant_Padding/1 0.287 ms 0.287 ms 2429 +OpenCV_MorphGrad2D_Constant_Padding/1 0.285 ms 0.285 ms 2460 +OpenCV_Dilate2D_Constant_Padding/1 0.163 ms 0.163 ms 4294 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 2b2ab04c..16b026ca 100644 --- a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:09:08+00:00", + "date": "2025-09-07T14:25:25+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.19043,3.99707,5.75732], + "load_avg": [3.9126,3.76514,4.7002], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 135, - "real_time": 5.1618975345735194e+00, - "cpu_time": 5.1616230666666665e+00, + "iterations": 140, + "real_time": 4.9801771395972798e+00, + "cpu_time": 4.9799727642857139e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 94, - "real_time": 7.6303360389268144e+00, - "cpu_time": 7.6299276276595762e+00, + "iterations": 92, + "real_time": 7.5637138570132461e+00, + "cpu_time": 7.5633655869565199e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1898, - "real_time": 3.6347741055036370e-01, - "cpu_time": 3.6347207218124333e-01, + "iterations": 1928, + "real_time": 3.6541006771298862e-01, + "cpu_time": 3.6539922977178435e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 821, - "real_time": 8.4532172522359006e-01, - "cpu_time": 8.4524135322777105e-01, + "iterations": 814, + "real_time": 8.6126693453484149e-01, + "cpu_time": 8.6124013022113044e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 535, - "real_time": 1.3060625816617057e+00, - "cpu_time": 1.3060270074766349e+00, + "iterations": 533, + "real_time": 1.3084959091992583e+00, + "cpu_time": 1.3084598105065661e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4701, - "real_time": 1.4897938473025324e-01, - "cpu_time": 1.4897103637523929e-01, + "iterations": 4720, + "real_time": 1.4866878815247850e-01, + "cpu_time": 1.4866191355932201e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2567, - "real_time": 2.7154003791322401e-01, - "cpu_time": 2.7153190027269208e-01, + "iterations": 2570, + "real_time": 2.7357344062643757e-01, + "cpu_time": 2.7355898560311276e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102141, - "real_time": 6.8959888496122178e-03, - "cpu_time": 6.8956638861965335e-03, + "iterations": 100923, + "real_time": 6.9336138939047895e-03, + "cpu_time": 6.9332814323791450e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48431, - "real_time": 1.4446364637891266e-02, - "cpu_time": 1.4446001445355251e-02, + "iterations": 47371, + "real_time": 1.4785069694910813e-02, + "cpu_time": 1.4784267484325859e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2834, - "real_time": 2.4457907615704022e-01, - "cpu_time": 2.4455411467889898e-01, + "iterations": 1930, + "real_time": 3.5314525771944016e-01, + "cpu_time": 3.5312864974093228e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2817, - "real_time": 2.5071918747507177e-01, - "cpu_time": 2.5071165566205178e-01, + "iterations": 2161, + "real_time": 3.0299739835439926e-01, + "cpu_time": 3.0298353354928237e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1763, - "real_time": 3.8233273665268042e-01, - "cpu_time": 3.8232422007941014e-01, + "iterations": 1325, + "real_time": 5.3083810322689562e-01, + "cpu_time": 5.3080599698113229e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1772, - "real_time": 3.9655546832232658e-01, - "cpu_time": 3.9653353950338632e-01, + "iterations": 1097, + "real_time": 5.6190424706572928e-01, + "cpu_time": 5.6186523518687359e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 651, - "real_time": 1.0213724948385710e+00, - "cpu_time": 1.0213337788018413e+00, + "iterations": 737, + "real_time": 9.5735288135393992e-01, + "cpu_time": 9.5733702578019098e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 675, - "real_time": 1.0147930460947532e+00, - "cpu_time": 1.0147491318518496e+00, + "iterations": 710, + "real_time": 9.2390114047997429e-01, + "cpu_time": 9.2382159295774735e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4248, - "real_time": 1.6329018223729061e-01, - "cpu_time": 1.6328074152542363e-01, + "iterations": 4222, + "real_time": 1.6582871410926897e-01, + "cpu_time": 1.6582006608242525e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2850, - "real_time": 2.4692150323014511e-01, - "cpu_time": 2.4691329473684170e-01, + "iterations": 2852, + "real_time": 2.4506063847103762e-01, + "cpu_time": 2.4504969950911681e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2844, - "real_time": 2.5029316460700646e-01, - "cpu_time": 2.5028627601969067e-01, + "iterations": 2864, + "real_time": 2.4439445982931712e-01, + "cpu_time": 2.4438653247206729e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2495, - "real_time": 2.8614350603912064e-01, - "cpu_time": 2.8613096152304607e-01, + "iterations": 2570, + "real_time": 2.7270443331638661e-01, + "cpu_time": 2.7268578988326853e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2450, - "real_time": 2.8474553811306857e-01, - "cpu_time": 2.8473111061224476e-01, + "iterations": 2597, + "real_time": 2.7151760983384476e-01, + "cpu_time": 2.7150931959953850e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2455, - "real_time": 2.8508260481954834e-01, - "cpu_time": 2.8507377230142528e-01, + "iterations": 2439, + "real_time": 2.8630925549401176e-01, + "cpu_time": 2.8630299056990677e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4286, - "real_time": 1.6393232873345001e-01, - "cpu_time": 1.6392709612692480e-01, + "iterations": 4308, + "real_time": 1.6245689889902082e-01, + "cpu_time": 1.6245064832869063e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 4a4578d7..a4f8253f 100644 --- a/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_prewittKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:09:08+00:00 +2025-09-07T14:25:25+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.19, 4.00, 5.76 +Load Average: 3.91, 3.77, 4.70 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.16 ms 5.16 ms 135 -MLIR_Conv2D/1 7.63 ms 7.63 ms 94 -Buddy_Conv2D/1 0.363 ms 0.363 ms 1898 -Buddy_Corr2D_Constant_Padding/1 0.845 ms 0.845 ms 821 -OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 535 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4701 -Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2567 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102141 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48431 -Buddy_Erosion2D_Constant_Padding/1 0.245 ms 0.245 ms 2834 -Buddy_Dilation2D_Constant_Padding/1 0.251 ms 0.251 ms 2817 -Buddy_Opening2D_Constant_Padding/1 0.382 ms 0.382 ms 1763 -Buddy_Closing2D_Constant_Padding/1 0.397 ms 0.397 ms 1772 -Buddy_TopHat2D_Constant_Padding/1 1.02 ms 1.02 ms 651 -Buddy_BottomHat2D_Constant_Padding/1 1.01 ms 1.01 ms 675 -OpenCV_Erode2D_Constant_Padding/1 0.163 ms 0.163 ms 4248 -OpenCV_Opening2D_Constant_Padding/1 0.247 ms 0.247 ms 2850 -OpenCV_Closing2D_Constant_Padding/1 0.250 ms 0.250 ms 2844 -OpenCV_TopHat2D_Constant_Padding/1 0.286 ms 0.286 ms 2495 -OpenCV_BottomHat2D_Constant_Padding/1 0.285 ms 0.285 ms 2450 -OpenCV_MorphGrad2D_Constant_Padding/1 0.285 ms 0.285 ms 2455 -OpenCV_Dilate2D_Constant_Padding/1 0.164 ms 0.164 ms 4286 +Eigen_Convolve2D/1 4.98 ms 4.98 ms 140 +MLIR_Conv2D/1 7.56 ms 7.56 ms 92 +Buddy_Conv2D/1 0.365 ms 0.365 ms 1928 +Buddy_Corr2D_Constant_Padding/1 0.861 ms 0.861 ms 814 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 533 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4720 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2570 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100923 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47371 +Buddy_Erosion2D_Constant_Padding/1 0.353 ms 0.353 ms 1930 +Buddy_Dilation2D_Constant_Padding/1 0.303 ms 0.303 ms 2161 +Buddy_Opening2D_Constant_Padding/1 0.531 ms 0.531 ms 1325 +Buddy_Closing2D_Constant_Padding/1 0.562 ms 0.562 ms 1097 +Buddy_TopHat2D_Constant_Padding/1 0.957 ms 0.957 ms 737 +Buddy_BottomHat2D_Constant_Padding/1 0.924 ms 0.924 ms 710 +OpenCV_Erode2D_Constant_Padding/1 0.166 ms 0.166 ms 4222 +OpenCV_Opening2D_Constant_Padding/1 0.245 ms 0.245 ms 2852 +OpenCV_Closing2D_Constant_Padding/1 0.244 ms 0.244 ms 2864 +OpenCV_TopHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2570 +OpenCV_BottomHat2D_Constant_Padding/1 0.272 ms 0.272 ms 2597 +OpenCV_MorphGrad2D_Constant_Padding/1 0.286 ms 0.286 ms 2439 +OpenCV_Dilate2D_Constant_Padding/1 0.162 ms 0.162 ms 4308 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 0a1d1b28..583475bb 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:09:33+00:00", + "date": "2025-09-07T14:25:50+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.04053,3.89941,5.67725], + "load_avg": [3.6001,3.70215,4.65381], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 137, - "real_time": 5.1388616000648835e+00, - "cpu_time": 5.1386628175182478e+00, + "real_time": 5.0967205723706819e+00, + "cpu_time": 5.0964819489051107e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 92, - "real_time": 7.5730732844575588e+00, - "cpu_time": 7.5728530760869583e+00, + "iterations": 91, + "real_time": 7.6128631257093868e+00, + "cpu_time": 7.6126163516483496e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2206, - "real_time": 3.2243556288135994e-01, - "cpu_time": 3.2241800589301906e-01, + "iterations": 1997, + "real_time": 3.4362500795556356e-01, + "cpu_time": 3.4358477866800197e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 826, - "real_time": 8.3711696450415884e-01, - "cpu_time": 8.3709625423728795e-01, + "iterations": 805, + "real_time": 8.6478873717118498e-01, + "cpu_time": 8.6472154285714264e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 531, - "real_time": 1.3111124899091020e+00, - "cpu_time": 1.3110567871939727e+00, + "iterations": 529, + "real_time": 1.3110746324963740e+00, + "cpu_time": 1.3110105841209827e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4648, - "real_time": 1.4950641512665608e-01, - "cpu_time": 1.4950070998278833e-01, + "iterations": 4716, + "real_time": 1.5283490043762279e-01, + "cpu_time": 1.5283251993214594e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2560, - "real_time": 2.7398462261771783e-01, - "cpu_time": 2.7396728242187507e-01, + "iterations": 2554, + "real_time": 2.7269076267162523e-01, + "cpu_time": 2.7268682106499587e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102196, - "real_time": 6.9250278208413379e-03, - "cpu_time": 6.9248903968844146e-03, + "iterations": 101059, + "real_time": 6.9261688433581575e-03, + "cpu_time": 6.9260731255998999e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 45300, - "real_time": 1.4499240281434511e-02, - "cpu_time": 1.4498946423841065e-02, + "iterations": 47776, + "real_time": 1.4638210093503979e-02, + "cpu_time": 1.4638031208137982e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2831, - "real_time": 2.4162875557484789e-01, - "cpu_time": 2.4161129918756594e-01, + "iterations": 1929, + "real_time": 2.9913433178092852e-01, + "cpu_time": 2.9912855054432330e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2893, - "real_time": 2.4165965119654276e-01, - "cpu_time": 2.4165194745938479e-01, + "iterations": 2459, + "real_time": 3.0520153711025383e-01, + "cpu_time": 3.0519616063440402e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1793, - "real_time": 3.9718762389189427e-01, - "cpu_time": 3.9718364082543139e-01, + "iterations": 1338, + "real_time": 5.4847415096734853e-01, + "cpu_time": 5.4846282810164348e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1854, - "real_time": 3.9794566280569205e-01, - "cpu_time": 3.9791046979503841e-01, + "iterations": 1000, + "real_time": 5.5285461992025375e-01, + "cpu_time": 5.5284405199999931e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 667, - "real_time": 1.0182474372626424e+00, - "cpu_time": 1.0181817661169434e+00, + "iterations": 716, + "real_time": 9.7495791445064806e-01, + "cpu_time": 9.7494059916201126e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 648, - "real_time": 1.0037174172423504e+00, - "cpu_time": 1.0036660324074091e+00, + "iterations": 692, + "real_time": 9.6457034909311745e-01, + "cpu_time": 9.6455439884393179e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4893, - "real_time": 1.4206621359638610e-01, - "cpu_time": 1.4206494400163472e-01, + "iterations": 4887, + "real_time": 1.4370574812374892e-01, + "cpu_time": 1.4370325148352775e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3067, - "real_time": 2.2388874809128631e-01, - "cpu_time": 2.2388449331594412e-01, + "iterations": 3079, + "real_time": 2.2928564821059302e-01, + "cpu_time": 2.2928312731406281e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3150, - "real_time": 2.2265229433301895e-01, - "cpu_time": 2.2264321619047678e-01, + "iterations": 3082, + "real_time": 2.2956116836926599e-01, + "cpu_time": 2.2955781213497742e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2705, - "real_time": 2.5939554141321375e-01, - "cpu_time": 2.5939025988909481e-01, + "iterations": 2636, + "real_time": 2.6566850951390852e-01, + "cpu_time": 2.6566524696509775e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2695, - "real_time": 2.5867627785024483e-01, - "cpu_time": 2.5867045009276496e-01, + "iterations": 2623, + "real_time": 2.6680013712380524e-01, + "cpu_time": 2.6679694357605821e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2673, - "real_time": 2.6060673423889108e-01, - "cpu_time": 2.6059759932659970e-01, + "iterations": 2663, + "real_time": 2.6179852199831988e-01, + "cpu_time": 2.6179651633496065e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4862, - "real_time": 1.4264437518263001e-01, - "cpu_time": 1.4263860037021783e-01, + "iterations": 4965, + "real_time": 1.4100358385333842e-01, + "cpu_time": 1.4100249889224539e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index f52a2204..96c6d225 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:09:33+00:00 +2025-09-07T14:25:50+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.04, 3.90, 5.68 +Load Average: 3.60, 3.70, 4.65 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.14 ms 5.14 ms 137 -MLIR_Conv2D/1 7.57 ms 7.57 ms 92 -Buddy_Conv2D/1 0.322 ms 0.322 ms 2206 -Buddy_Corr2D_Constant_Padding/1 0.837 ms 0.837 ms 826 -OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 531 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4648 -Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2560 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102196 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 45300 -Buddy_Erosion2D_Constant_Padding/1 0.242 ms 0.242 ms 2831 -Buddy_Dilation2D_Constant_Padding/1 0.242 ms 0.242 ms 2893 -Buddy_Opening2D_Constant_Padding/1 0.397 ms 0.397 ms 1793 -Buddy_Closing2D_Constant_Padding/1 0.398 ms 0.398 ms 1854 -Buddy_TopHat2D_Constant_Padding/1 1.02 ms 1.02 ms 667 -Buddy_BottomHat2D_Constant_Padding/1 1.00 ms 1.00 ms 648 -OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4893 -OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3067 -OpenCV_Closing2D_Constant_Padding/1 0.223 ms 0.223 ms 3150 -OpenCV_TopHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2705 -OpenCV_BottomHat2D_Constant_Padding/1 0.259 ms 0.259 ms 2695 -OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2673 -OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4862 +Eigen_Convolve2D/1 5.10 ms 5.10 ms 137 +MLIR_Conv2D/1 7.61 ms 7.61 ms 91 +Buddy_Conv2D/1 0.344 ms 0.344 ms 1997 +Buddy_Corr2D_Constant_Padding/1 0.865 ms 0.865 ms 805 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 529 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.153 ms 0.153 ms 4716 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2554 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101059 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47776 +Buddy_Erosion2D_Constant_Padding/1 0.299 ms 0.299 ms 1929 +Buddy_Dilation2D_Constant_Padding/1 0.305 ms 0.305 ms 2459 +Buddy_Opening2D_Constant_Padding/1 0.548 ms 0.548 ms 1338 +Buddy_Closing2D_Constant_Padding/1 0.553 ms 0.553 ms 1000 +Buddy_TopHat2D_Constant_Padding/1 0.975 ms 0.975 ms 716 +Buddy_BottomHat2D_Constant_Padding/1 0.965 ms 0.965 ms 692 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4887 +OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3079 +OpenCV_Closing2D_Constant_Padding/1 0.230 ms 0.230 ms 3082 +OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2636 +OpenCV_BottomHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2623 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2663 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4965 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 64dd32da..b1ffde62 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:09:57+00:00", + "date": "2025-09-07T14:26:14+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.94531,3.80908,5.59961], + "load_avg": [3.39404,3.64453,4.60889], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 134, - "real_time": 5.1393594815215069e+00, - "cpu_time": 5.1390401492537316e+00, + "iterations": 121, + "real_time": 5.1075547071527847e+00, + "cpu_time": 5.1073665289256196e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 91, - "real_time": 7.5410613483125033e+00, - "cpu_time": 7.5408332637362649e+00, + "iterations": 93, + "real_time": 7.6060896358823262e+00, + "cpu_time": 7.6059665161290306e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2173, - "real_time": 3.2154843896915436e-01, - "cpu_time": 3.2153552692130705e-01, + "iterations": 2123, + "real_time": 3.2846411506746714e-01, + "cpu_time": 3.2843848233631667e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 829, - "real_time": 8.4328651877594230e-01, - "cpu_time": 8.4327024969843234e-01, + "iterations": 817, + "real_time": 8.6353970019003168e-01, + "cpu_time": 8.6350830844553239e-01, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 532, - "real_time": 1.3073873881222611e+00, - "cpu_time": 1.3073464718045120e+00, + "iterations": 533, + "real_time": 1.3113399062438484e+00, + "cpu_time": 1.3112924840525322e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4585, - "real_time": 1.5132041605374286e-01, - "cpu_time": 1.5102619302071968e-01, + "iterations": 4732, + "real_time": 1.4925754329131402e-01, + "cpu_time": 1.4925238377007605e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2573, - "real_time": 2.7366480878758792e-01, - "cpu_time": 2.7364495919160520e-01, + "iterations": 2557, + "real_time": 2.7432158665121836e-01, + "cpu_time": 2.7429795150567055e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101803, - "real_time": 6.8592732155106284e-03, - "cpu_time": 6.8590132805516511e-03, + "iterations": 101042, + "real_time": 6.9291528471928992e-03, + "cpu_time": 6.9288272599513055e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48122, - "real_time": 1.4427633615687751e-02, - "cpu_time": 1.4427098915257043e-02, + "iterations": 47868, + "real_time": 1.4627723422845006e-02, + "cpu_time": 1.4627092149243754e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2888, - "real_time": 2.4509608972600952e-01, - "cpu_time": 2.4508256613573420e-01, + "iterations": 2430, + "real_time": 2.8533444973666972e-01, + "cpu_time": 2.8531405061728438e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2928, - "real_time": 2.4123604490823758e-01, - "cpu_time": 2.4122515266393438e-01, + "iterations": 2094, + "real_time": 3.1473711951357358e-01, + "cpu_time": 3.1470967144221584e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1789, - "real_time": 3.8186785993528072e-01, - "cpu_time": 3.8185716601453323e-01, + "iterations": 1014, + "real_time": 5.7866645120892535e-01, + "cpu_time": 5.7864507199211168e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1769, - "real_time": 3.9249769353543112e-01, - "cpu_time": 3.9247566817410967e-01, + "iterations": 1111, + "real_time": 5.6143095641389396e-01, + "cpu_time": 5.6142596129612876e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 664, - "real_time": 1.0108141711049050e+00, - "cpu_time": 1.0107552921686753e+00, + "iterations": 716, + "real_time": 9.7362239787864946e-01, + "cpu_time": 9.7356560335195652e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 673, - "real_time": 9.8877163246871813e-01, - "cpu_time": 9.8869461069836506e-01, + "iterations": 685, + "real_time": 9.8786628159293288e-01, + "cpu_time": 9.8785897518248145e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4863, - "real_time": 1.4223833171006905e-01, - "cpu_time": 1.4223450503804233e-01, + "iterations": 4300, + "real_time": 1.4379027624462926e-01, + "cpu_time": 1.4378212465116308e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3100, - "real_time": 2.4483322376205074e-01, - "cpu_time": 2.4482809935483879e-01, + "iterations": 3063, + "real_time": 2.2853155405428766e-01, + "cpu_time": 2.2852091609533151e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3060, - "real_time": 2.2852179880430495e-01, - "cpu_time": 2.2851666633986922e-01, + "iterations": 3071, + "real_time": 2.2941528513508408e-01, + "cpu_time": 2.2940828427222390e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2675, - "real_time": 2.5958791375160217e-01, - "cpu_time": 2.5957632560747695e-01, + "iterations": 2523, + "real_time": 2.6365732489906035e-01, + "cpu_time": 2.6365356282203672e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2709, - "real_time": 2.6177893945897096e-01, - "cpu_time": 2.6177052639350257e-01, + "iterations": 2656, + "real_time": 2.6352780433483869e-01, + "cpu_time": 2.6352464533132480e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2658, - "real_time": 2.6277054109637948e-01, - "cpu_time": 2.6275918924003028e-01, + "iterations": 2657, + "real_time": 2.6354999174558447e-01, + "cpu_time": 2.6354654459917209e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4890, - "real_time": 1.4260715456462345e-01, - "cpu_time": 1.4260182985685071e-01, + "iterations": 4948, + "real_time": 1.4165986747922873e-01, + "cpu_time": 1.4165828880355658e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index e75240ac..6a502cdc 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel3x3KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:09:57+00:00 +2025-09-07T14:26:14+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.95, 3.81, 5.60 +Load Average: 3.39, 3.64, 4.61 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.14 ms 5.14 ms 134 -MLIR_Conv2D/1 7.54 ms 7.54 ms 91 -Buddy_Conv2D/1 0.322 ms 0.322 ms 2173 -Buddy_Corr2D_Constant_Padding/1 0.843 ms 0.843 ms 829 -OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 532 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.151 ms 0.151 ms 4585 -Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2573 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101803 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48122 -Buddy_Erosion2D_Constant_Padding/1 0.245 ms 0.245 ms 2888 -Buddy_Dilation2D_Constant_Padding/1 0.241 ms 0.241 ms 2928 -Buddy_Opening2D_Constant_Padding/1 0.382 ms 0.382 ms 1789 -Buddy_Closing2D_Constant_Padding/1 0.392 ms 0.392 ms 1769 -Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 664 -Buddy_BottomHat2D_Constant_Padding/1 0.989 ms 0.989 ms 673 -OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4863 -OpenCV_Opening2D_Constant_Padding/1 0.245 ms 0.245 ms 3100 -OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3060 -OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2675 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2709 -OpenCV_MorphGrad2D_Constant_Padding/1 0.263 ms 0.263 ms 2658 -OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4890 +Eigen_Convolve2D/1 5.11 ms 5.11 ms 121 +MLIR_Conv2D/1 7.61 ms 7.61 ms 93 +Buddy_Conv2D/1 0.328 ms 0.328 ms 2123 +Buddy_Corr2D_Constant_Padding/1 0.864 ms 0.864 ms 817 +OpenCV_Filter2D_Constant_Padding/1 1.31 ms 1.31 ms 533 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4732 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2557 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101042 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47868 +Buddy_Erosion2D_Constant_Padding/1 0.285 ms 0.285 ms 2430 +Buddy_Dilation2D_Constant_Padding/1 0.315 ms 0.315 ms 2094 +Buddy_Opening2D_Constant_Padding/1 0.579 ms 0.579 ms 1014 +Buddy_Closing2D_Constant_Padding/1 0.561 ms 0.561 ms 1111 +Buddy_TopHat2D_Constant_Padding/1 0.974 ms 0.974 ms 716 +Buddy_BottomHat2D_Constant_Padding/1 0.988 ms 0.988 ms 685 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4300 +OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3063 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3071 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2523 +OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2656 +OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2657 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4948 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 38474c73..7cbd4e18 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:10:21+00:00", + "date": "2025-09-07T14:26:37+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.95947,3.74316,5.5293], + "load_avg": [3.28125,3.60205,4.57373], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 58, - "real_time": 1.2066526531145490e+01, - "cpu_time": 1.2065775413793103e+01, + "iterations": 56, + "real_time": 1.2067083735018969e+01, + "cpu_time": 1.2066812107142857e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 23, - "real_time": 3.0403987545034159e+01, - "cpu_time": 3.0402858130434790e+01, + "real_time": 3.0612203414025515e+01, + "cpu_time": 3.0611188304347817e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 532, - "real_time": 1.3182804870762324e+00, - "cpu_time": 1.3182079266917295e+00, + "iterations": 568, + "real_time": 1.3034208284192521e+00, + "cpu_time": 1.3033825299295774e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 287, - "real_time": 2.4341225805805951e+00, - "cpu_time": 2.4340057770034864e+00, + "iterations": 282, + "real_time": 2.4699879717742297e+00, + "cpu_time": 2.4699094858156023e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 163, - "real_time": 4.3055439379317626e+00, - "cpu_time": 4.3053028404907954e+00, + "iterations": 162, + "real_time": 4.3287115562477227e+00, + "cpu_time": 4.3285012407407431e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4634, - "real_time": 1.5098747890161243e-01, - "cpu_time": 1.5098142684505816e-01, + "iterations": 4700, + "real_time": 1.4935964916614777e-01, + "cpu_time": 1.4935194957446807e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2565, - "real_time": 2.7393707115980154e-01, - "cpu_time": 2.7392365964912280e-01, + "iterations": 2552, + "real_time": 2.7521437495292916e-01, + "cpu_time": 2.7520046473354215e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101737, - "real_time": 6.8683102937117163e-03, - "cpu_time": 6.8680850231479179e-03, + "iterations": 100337, + "real_time": 6.9741815465525284e-03, + "cpu_time": 6.9738547993262777e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 47105, - "real_time": 1.4881931008863394e-02, - "cpu_time": 1.4881426748752757e-02, + "iterations": 46773, + "real_time": 1.4982721659303913e-02, + "cpu_time": 1.4982278921600070e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3021, - "real_time": 2.3487997751526388e-01, - "cpu_time": 2.3487021383647805e-01, + "iterations": 2050, + "real_time": 3.1215877976359391e-01, + "cpu_time": 3.1211740390243931e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2916, - "real_time": 2.3379066731622353e-01, - "cpu_time": 2.3377707304526726e-01, + "iterations": 2432, + "real_time": 2.9099147190879049e-01, + "cpu_time": 2.9097846422697360e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1884, - "real_time": 3.7098769691719369e-01, - "cpu_time": 3.7097218789808861e-01, + "iterations": 1079, + "real_time": 5.8892201131643029e-01, + "cpu_time": 5.8888681742354165e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1844, - "real_time": 3.9446013504861493e-01, - "cpu_time": 3.9443875271149736e-01, + "iterations": 1200, + "real_time": 5.7249554432928562e-01, + "cpu_time": 5.7247323416666751e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 686, - "real_time": 1.0319062326722521e+00, - "cpu_time": 1.0318697463556861e+00, + "iterations": 693, + "real_time": 9.6331589450739852e-01, + "cpu_time": 9.6324691630591597e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 644, - "real_time": 1.0185169970026668e+00, - "cpu_time": 1.0184906350931668e+00, + "iterations": 720, + "real_time": 9.4459417483044994e-01, + "cpu_time": 9.4454029166666720e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4867, - "real_time": 1.4380763933745305e-01, - "cpu_time": 1.4380276741319087e-01, + "iterations": 4811, + "real_time": 1.4385227303672446e-01, + "cpu_time": 1.4383307960922870e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3143, - "real_time": 2.2292424454417459e-01, - "cpu_time": 2.2291499936366541e-01, + "iterations": 3023, + "real_time": 2.3205718441471690e-01, + "cpu_time": 2.3204768309626247e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3113, - "real_time": 2.2401934842032478e-01, - "cpu_time": 2.2401028268551296e-01, + "iterations": 3090, + "real_time": 2.2930755222692459e-01, + "cpu_time": 2.2929591650485445e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2714, - "real_time": 2.5358747803470888e-01, - "cpu_time": 2.5357193478260892e-01, + "iterations": 2569, + "real_time": 2.6998296670061739e-01, + "cpu_time": 2.6997637757882353e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2759, - "real_time": 2.5393757948887524e-01, - "cpu_time": 2.5392501087350461e-01, + "iterations": 2579, + "real_time": 2.6765619283639541e-01, + "cpu_time": 2.6764102830554409e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2635, - "real_time": 2.6606651733450914e-01, - "cpu_time": 2.6604915483870911e-01, + "iterations": 2657, + "real_time": 2.6501084954432630e-01, + "cpu_time": 2.6500044373353410e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4941, - "real_time": 1.4199837635123663e-01, - "cpu_time": 1.4199401133373810e-01, + "iterations": 4921, + "real_time": 1.4214790953269119e-01, + "cpu_time": 1.4214541942694606e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index f82ee0db..381be4b4 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:10:21+00:00 +2025-09-07T14:26:37+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.96, 3.74, 5.53 +Load Average: 3.28, 3.60, 4.57 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 12.1 ms 12.1 ms 58 -MLIR_Conv2D/1 30.4 ms 30.4 ms 23 -Buddy_Conv2D/1 1.32 ms 1.32 ms 532 -Buddy_Corr2D_Constant_Padding/1 2.43 ms 2.43 ms 287 -OpenCV_Filter2D_Constant_Padding/1 4.31 ms 4.31 ms 163 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.151 ms 0.151 ms 4634 -Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2565 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101737 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47105 -Buddy_Erosion2D_Constant_Padding/1 0.235 ms 0.235 ms 3021 -Buddy_Dilation2D_Constant_Padding/1 0.234 ms 0.234 ms 2916 -Buddy_Opening2D_Constant_Padding/1 0.371 ms 0.371 ms 1884 -Buddy_Closing2D_Constant_Padding/1 0.394 ms 0.394 ms 1844 -Buddy_TopHat2D_Constant_Padding/1 1.03 ms 1.03 ms 686 -Buddy_BottomHat2D_Constant_Padding/1 1.02 ms 1.02 ms 644 -OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4867 -OpenCV_Opening2D_Constant_Padding/1 0.223 ms 0.223 ms 3143 -OpenCV_Closing2D_Constant_Padding/1 0.224 ms 0.224 ms 3113 -OpenCV_TopHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2714 -OpenCV_BottomHat2D_Constant_Padding/1 0.254 ms 0.254 ms 2759 -OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2635 -OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4941 +Eigen_Convolve2D/1 12.1 ms 12.1 ms 56 +MLIR_Conv2D/1 30.6 ms 30.6 ms 23 +Buddy_Conv2D/1 1.30 ms 1.30 ms 568 +Buddy_Corr2D_Constant_Padding/1 2.47 ms 2.47 ms 282 +OpenCV_Filter2D_Constant_Padding/1 4.33 ms 4.33 ms 162 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4700 +Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2552 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100337 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 46773 +Buddy_Erosion2D_Constant_Padding/1 0.312 ms 0.312 ms 2050 +Buddy_Dilation2D_Constant_Padding/1 0.291 ms 0.291 ms 2432 +Buddy_Opening2D_Constant_Padding/1 0.589 ms 0.589 ms 1079 +Buddy_Closing2D_Constant_Padding/1 0.572 ms 0.572 ms 1200 +Buddy_TopHat2D_Constant_Padding/1 0.963 ms 0.963 ms 693 +Buddy_BottomHat2D_Constant_Padding/1 0.945 ms 0.945 ms 720 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4811 +OpenCV_Opening2D_Constant_Padding/1 0.232 ms 0.232 ms 3023 +OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3090 +OpenCV_TopHat2D_Constant_Padding/1 0.270 ms 0.270 ms 2569 +OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2579 +OpenCV_MorphGrad2D_Constant_Padding/1 0.265 ms 0.265 ms 2657 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4921 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 4515ca1c..e0d17d98 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:10:45+00:00", + "date": "2025-09-07T14:27:01+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.63086,3.60156,5.43457], + "load_avg": [3.31982,3.58496,4.54102], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 58, - "real_time": 1.2157393234043285e+01, - "cpu_time": 1.2156209275862070e+01, + "iterations": 59, + "real_time": 1.1948739699387955e+01, + "cpu_time": 1.1948392644067797e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 23, - "real_time": 3.0382284651631895e+01, - "cpu_time": 3.0381463956521745e+01, + "real_time": 3.0476028666548107e+01, + "cpu_time": 3.0474844999999998e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 566, - "real_time": 1.2311481281641095e+00, - "cpu_time": 1.2311096890459359e+00, + "iterations": 562, + "real_time": 1.2884445793369912e+00, + "cpu_time": 1.2884208736654807e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 287, - "real_time": 2.4388498052279708e+00, - "cpu_time": 2.4387647142857145e+00, + "iterations": 284, + "real_time": 2.4603739590711995e+00, + "cpu_time": 2.4602806619718312e+00, "time_unit": "ms" }, { @@ -102,8 +102,8 @@ "repetition_index": 0, "threads": 1, "iterations": 163, - "real_time": 4.2787142600749899e+00, - "cpu_time": 4.2785227607361991e+00, + "real_time": 4.2872666581276739e+00, + "cpu_time": 4.2871032392638035e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4617, - "real_time": 1.4999322794928086e-01, - "cpu_time": 1.4998539831059118e-01, + "iterations": 4715, + "real_time": 1.4838221327876741e-01, + "cpu_time": 1.4837547020148453e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2561, - "real_time": 2.7250940011423669e-01, - "cpu_time": 2.7250318976962118e-01, + "iterations": 2576, + "real_time": 2.7262466462178631e-01, + "cpu_time": 2.7260505046583849e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101810, - "real_time": 6.8337629754936027e-03, - "cpu_time": 6.8336145270601984e-03, + "iterations": 101005, + "real_time": 6.9283273903298307e-03, + "cpu_time": 6.9277283500816817e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48577, - "real_time": 1.4544123902229474e-02, - "cpu_time": 1.4543825637647435e-02, + "iterations": 47760, + "real_time": 1.4637780439738853e-02, + "cpu_time": 1.4637589635678379e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2990, - "real_time": 2.3945177154596833e-01, - "cpu_time": 2.3944178795986570e-01, + "iterations": 2247, + "real_time": 2.7769743156104176e-01, + "cpu_time": 2.7768090075656437e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2958, - "real_time": 2.3566706432249998e-01, - "cpu_time": 2.3565873157538894e-01, + "iterations": 2603, + "real_time": 3.3085079827173097e-01, + "cpu_time": 3.3083193046484832e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1884, - "real_time": 3.8850708664918909e-01, - "cpu_time": 3.8848084235668817e-01, + "iterations": 1077, + "real_time": 5.8244004112321579e-01, + "cpu_time": 5.8239145868152165e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1811, - "real_time": 3.7958499091491721e-01, - "cpu_time": 3.7957069243511937e-01, + "iterations": 1171, + "real_time": 5.6926543123695605e-01, + "cpu_time": 5.6925410674637089e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 725, - "real_time": 1.0013685658060272e+00, - "cpu_time": 1.0012686234482775e+00, + "iterations": 737, + "real_time": 9.5344758927417739e-01, + "cpu_time": 9.5340984531885853e-01, "time_unit": "ms" }, { @@ -241,12 +241,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 686, - "real_time": 9.9782866721771901e-01, - "cpu_time": 9.9774830320699592e-01, + "iterations": 722, + "real_time": 9.4657656001417256e-01, + "cpu_time": 9.4650936842105216e-01, "time_unit": "ms" - }, - { + } { "name": "OpenCV_Erode2D_Constant_Padding/1", "family_index": 15, "per_family_instance_index": 0, @@ -255,9 +254,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4854, - "real_time": 1.4177612733168027e-01, - "cpu_time": 1.4176570457354778e-01, + "iterations": 4117, + "real_time": 1.6993736472267648e-01, + "cpu_time": 1.6993461015302383e-01, "time_unit": "ms" }, { @@ -269,80 +268,8 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3120, - "real_time": 2.2748470760117739e-01, - "cpu_time": 2.2747433525640967e-01, - "time_unit": "ms" - }, - { - "name": "OpenCV_Closing2D_Constant_Padding/1", - "family_index": 17, - "per_family_instance_index": 0, - "run_name": "OpenCV_Closing2D_Constant_Padding/1", - "run_type": "iteration", - "repetitions": 1, - "repetition_index": 0, - "threads": 1, - "iterations": 3017, - "real_time": 2.2835247798949074e-01, - "cpu_time": 2.2833371925754087e-01, - "time_unit": "ms" - }, - { - "name": "OpenCV_TopHat2D_Constant_Padding/1", - "family_index": 18, - "per_family_instance_index": 0, - "run_name": "OpenCV_TopHat2D_Constant_Padding/1", - "run_type": "iteration", - "repetitions": 1, - "repetition_index": 0, - "threads": 1, - "iterations": 2621, - "real_time": 2.6303982573217766e-01, - "cpu_time": 2.6302844982830992e-01, - "time_unit": "ms" - }, - { - "name": "OpenCV_BottomHat2D_Constant_Padding/1", - "family_index": 19, - "per_family_instance_index": 0, - "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", - "run_type": "iteration", - "repetitions": 1, - "repetition_index": 0, - "threads": 1, - "iterations": 2639, - "real_time": 2.6201856276355667e-01, - "cpu_time": 2.6201338423645221e-01, - "time_unit": "ms" - }, - { - "name": "OpenCV_MorphGrad2D_Constant_Padding/1", - "family_index": 20, - "per_family_instance_index": 0, - "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", - "run_type": "iteration", - "repetitions": 1, - "repetition_index": 0, - "threads": 1, - "iterations": 2588, - "real_time": 2.6833365197758402e-01, - "cpu_time": 2.6831637287480692e-01, - "time_unit": "ms" - }, - { - "name": "OpenCV_Dilate2D_Constant_Padding/1", - "family_index": 21, - "per_family_instance_index": 0, - "run_name": "OpenCV_Dilate2D_Constant_Padding/1", - "run_type": "iteration", - "repetitions": 1, - "repetition_index": 0, - "threads": 1, - "iterations": 4864, - "real_time": 1.4279707791406268e-01, - "cpu_time": 1.4278683675986900e-01, + "iterations": 2816, + "real_time": 2.4825724275698038e-01, + "cpu_time": 2.4825158025568175e-01, "time_unit": "ms" - } - ] -} + } \ No newline at end of file diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 89be8078..7befb623 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:10:45+00:00 +2025-09-07T14:27:01+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,51 +6,25 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.63, 3.60, 5.43 +Load Average: 3.32, 3.58, 4.54 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 12.2 ms 12.2 ms 58 -MLIR_Conv2D/1 30.4 ms 30.4 ms 23 -Buddy_Conv2D/1 1.23 ms 1.23 ms 566 -Buddy_Corr2D_Constant_Padding/1 2.44 ms 2.44 ms 287 -OpenCV_Filter2D_Constant_Padding/1 4.28 ms 4.28 ms 163 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4617 -Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2561 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101810 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 48577 -Buddy_Erosion2D_Constant_Padding/1 0.239 ms 0.239 ms 2990 -Buddy_Dilation2D_Constant_Padding/1 0.236 ms 0.236 ms 2958 -Buddy_Opening2D_Constant_Padding/1 0.389 ms 0.388 ms 1884 -Buddy_Closing2D_Constant_Padding/1 0.380 ms 0.380 ms 1811 -Buddy_TopHat2D_Constant_Padding/1 1.00 ms 1.00 ms 725 -Buddy_BottomHat2D_Constant_Padding/1 0.998 ms 0.998 ms 686 -OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4854 -OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 3120 -OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 3017 -OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2621 -OpenCV_BottomHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2639 -OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2588 -OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4864 -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. +Eigen_Convolve2D/1 11.9 ms 11.9 ms 59 +MLIR_Conv2D/1 30.5 ms 30.5 ms 23 +Buddy_Conv2D/1 1.29 ms 1.29 ms 562 +Buddy_Corr2D_Constant_Padding/1 2.46 ms 2.46 ms 284 +OpenCV_Filter2D_Constant_Padding/1 4.29 ms 4.29 ms 163 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4715 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2576 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101005 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47760 +Buddy_Erosion2D_Constant_Padding/1 0.278 ms 0.278 ms 2247 +Buddy_Dilation2D_Constant_Padding/1 0.331 ms 0.331 ms 2603 +Buddy_Opening2D_Constant_Padding/1 0.582 ms 0.582 ms 1077 +Buddy_Closing2D_Constant_Padding/1 0.569 ms 0.569 ms 1171 +Buddy_TopHat2D_Constant_Padding/1 0.953 ms 0.953 ms 737 +Buddy_BottomHat2D_Constant_Padding/1 0.947 ms 0.947 ms 722 +OpenCV_Erode2D_Constant_Padding/1 0.170 ms 0.170 ms 4117 +OpenCV_Opening2D_Constant_Padding/1 0.248 ms 0.248 ms 2816 diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index f80f74e1..4622ce88 100644 --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -273,7 +273,7 @@ "real_time": 2.2559705339059108e-01, "cpu_time": 2.2558599385908198e-01, "time_unit": "ms" - } + }, { "name": "OpenCV_Closing2D_Constant_Padding/1", "family_index": 17, @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3186, - "real_time": 2.1989222520077220e-01, - "cpu_time": 2.1989068173258070e-01, + "iterations": 3097, + "real_time": 2.2691802094934985e-01, + "cpu_time": 2.2690593283823002e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2722, - "real_time": 2.6317579229687349e-01, - "cpu_time": 2.6317380529022705e-01, + "iterations": 2642, + "real_time": 2.6526975668938568e-01, + "cpu_time": 2.6526233232399676e-01, "time_unit": "ms" }, { @@ -311,8 +311,38 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2637, - "real_time": 2.6459206470631269e-01, - "cpu_time": 2.6458999696624969e-01, + "iterations": 2635, + "real_time": 2.6353473653150916e-01, + "cpu_time": 2.6351631650853868e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2666, + "real_time": 2.6146747557542777e-01, + "cpu_time": 2.6145232070517682e-01, "time_unit": "ms" - } \ No newline at end of file + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4900, + "real_time": 1.4341449479059298e-01, + "cpu_time": 1.4340792591836732e-01, + "time_unit": "ms" + } + ] +} diff --git a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 919ab5bf..d5b3ef25 100644 --- a/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -28,6 +28,29 @@ Buddy_TopHat2D_Constant_Padding/1 0.954 ms 0.954 ms Buddy_BottomHat2D_Constant_Padding/1 0.956 ms 0.956 ms 712 OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4941 OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3094 -OpenCV_Closing2D_Constant_Padding/1 0.220 ms 0.220 ms 3186 -OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2722 -OpenCV_BottomHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2637 +OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3097 +OpenCV_TopHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2642 +OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2635 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2666 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4900 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 494e78a9..aea059fb 100644 --- a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:07:46+00:00", + "date": "2025-09-07T14:24:06+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.30029,4.25293,5.99219], + "load_avg": [3.06689,3.66504,4.76025], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 58, - "real_time": 1.2046421396321264e+01, - "cpu_time": 1.2045839844827588e+01, + "real_time": 1.1998714814926016e+01, + "cpu_time": 1.1997443879310346e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 23, - "real_time": 3.0488935015771698e+01, - "cpu_time": 3.0488128913043482e+01, + "real_time": 3.0360637961522393e+01, + "cpu_time": 3.0356734521739146e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 311, - "real_time": 2.2170821450339253e+00, - "cpu_time": 2.2169166109324756e+00, + "iterations": 322, + "real_time": 2.1898058237311262e+00, + "cpu_time": 2.1895025559006216e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 361, - "real_time": 1.9300536064229843e+00, - "cpu_time": 1.9299647423822710e+00, + "iterations": 377, + "real_time": 1.8404441384168773e+00, + "cpu_time": 1.8402098037135266e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 245, - "real_time": 2.8560382979256764e+00, - "cpu_time": 2.8558862775510190e+00, + "iterations": 248, + "real_time": 2.8030298680307402e+00, + "cpu_time": 2.8026356330645159e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4623, - "real_time": 1.4924907078996716e-01, - "cpu_time": 1.4924138178671856e-01, + "iterations": 4741, + "real_time": 1.4848858149487670e-01, + "cpu_time": 1.4846917338114329e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2574, - "real_time": 2.7157393339862174e-01, - "cpu_time": 2.7156123815073813e-01, + "iterations": 2547, + "real_time": 2.7368638209609553e-01, + "cpu_time": 2.7364592854338443e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101787, - "real_time": 6.8410452289965062e-03, - "cpu_time": 6.8409221511587948e-03, + "iterations": 101088, + "real_time": 6.9170411064131193e-03, + "cpu_time": 6.9161112792814165e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48536, - "real_time": 1.4426544273662984e-02, - "cpu_time": 1.4426188993736632e-02, + "iterations": 47818, + "real_time": 1.4660072789606434e-02, + "cpu_time": 1.4659816428959836e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2911, - "real_time": 2.5185101356182016e-01, - "cpu_time": 2.5184438096873979e-01, + "iterations": 2386, + "real_time": 2.8420215330499615e-01, + "cpu_time": 2.8416078960603486e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2915, - "real_time": 2.4232492489839294e-01, - "cpu_time": 2.4231269228130314e-01, + "iterations": 2544, + "real_time": 2.8090887269555770e-01, + "cpu_time": 2.8086637106918216e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1864, - "real_time": 3.9819089791741497e-01, - "cpu_time": 3.9817836051502159e-01, + "iterations": 1000, + "real_time": 6.0656908899545670e-01, + "cpu_time": 6.0655633599999881e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1848, - "real_time": 3.8097190037692263e-01, - "cpu_time": 3.8095753138528204e-01, + "iterations": 1866, + "real_time": 3.7647417416079382e-01, + "cpu_time": 3.7641147481243248e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 696, - "real_time": 9.8893719179363082e-01, - "cpu_time": 9.8892107614942482e-01, + "iterations": 737, + "real_time": 9.6684982406235775e-01, + "cpu_time": 9.6672385888738199e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 672, - "real_time": 9.8550423336703152e-01, - "cpu_time": 9.8546595089285938e-01, + "iterations": 692, + "real_time": 9.6456302232997271e-01, + "cpu_time": 9.6441563439306299e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4801, - "real_time": 1.5243401774588389e-01, - "cpu_time": 1.5243083399291801e-01, + "iterations": 4842, + "real_time": 1.4419430610637241e-01, + "cpu_time": 1.4419237773647245e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3052, - "real_time": 2.2796939614169096e-01, - "cpu_time": 2.2795512254259492e-01, + "iterations": 3046, + "real_time": 2.3022636120560447e-01, + "cpu_time": 2.3022371799080765e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3093, - "real_time": 2.3295791055483223e-01, - "cpu_time": 2.3295580375040367e-01, + "iterations": 3026, + "real_time": 2.3162924122589751e-01, + "cpu_time": 2.3162562723066799e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2685, - "real_time": 2.6288813108394488e-01, - "cpu_time": 2.6288156722532574e-01, + "iterations": 2617, + "real_time": 2.6727178663208961e-01, + "cpu_time": 2.6726527283148621e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2626, - "real_time": 2.6564019032089381e-01, - "cpu_time": 2.6563777837014435e-01, + "iterations": 2605, + "real_time": 2.6782371547080275e-01, + "cpu_time": 2.6782162571976953e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2529, - "real_time": 2.7600545458239312e-01, - "cpu_time": 2.7599608105970802e-01, + "iterations": 2647, + "real_time": 2.6472585017518091e-01, + "cpu_time": 2.6472381412920259e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4868, - "real_time": 1.4438264488243333e-01, - "cpu_time": 1.4438140488907125e-01, + "iterations": 4880, + "real_time": 1.4423252938345807e-01, + "cpu_time": 1.4421351024590140e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 5b86f3a7..91bbb2d4 100644 --- a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:07:46+00:00 +2025-09-07T14:24:06+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.30, 4.25, 5.99 +Load Average: 3.07, 3.67, 4.76 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- Eigen_Convolve2D/1 12.0 ms 12.0 ms 58 -MLIR_Conv2D/1 30.5 ms 30.5 ms 23 -Buddy_Conv2D/1 2.22 ms 2.22 ms 311 -Buddy_Corr2D_Constant_Padding/1 1.93 ms 1.93 ms 361 -OpenCV_Filter2D_Constant_Padding/1 2.86 ms 2.86 ms 245 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4623 -Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2574 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101787 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48536 -Buddy_Erosion2D_Constant_Padding/1 0.252 ms 0.252 ms 2911 -Buddy_Dilation2D_Constant_Padding/1 0.242 ms 0.242 ms 2915 -Buddy_Opening2D_Constant_Padding/1 0.398 ms 0.398 ms 1864 -Buddy_Closing2D_Constant_Padding/1 0.381 ms 0.381 ms 1848 -Buddy_TopHat2D_Constant_Padding/1 0.989 ms 0.989 ms 696 -Buddy_BottomHat2D_Constant_Padding/1 0.986 ms 0.985 ms 672 -OpenCV_Erode2D_Constant_Padding/1 0.152 ms 0.152 ms 4801 -OpenCV_Opening2D_Constant_Padding/1 0.228 ms 0.228 ms 3052 -OpenCV_Closing2D_Constant_Padding/1 0.233 ms 0.233 ms 3093 -OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2685 -OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2626 -OpenCV_MorphGrad2D_Constant_Padding/1 0.276 ms 0.276 ms 2529 -OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4868 +MLIR_Conv2D/1 30.4 ms 30.4 ms 23 +Buddy_Conv2D/1 2.19 ms 2.19 ms 322 +Buddy_Corr2D_Constant_Padding/1 1.84 ms 1.84 ms 377 +OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 248 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4741 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2547 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101088 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47818 +Buddy_Erosion2D_Constant_Padding/1 0.284 ms 0.284 ms 2386 +Buddy_Dilation2D_Constant_Padding/1 0.281 ms 0.281 ms 2544 +Buddy_Opening2D_Constant_Padding/1 0.607 ms 0.607 ms 1000 +Buddy_Closing2D_Constant_Padding/1 0.376 ms 0.376 ms 1866 +Buddy_TopHat2D_Constant_Padding/1 0.967 ms 0.967 ms 737 +Buddy_BottomHat2D_Constant_Padding/1 0.965 ms 0.964 ms 692 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4842 +OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3046 +OpenCV_Closing2D_Constant_Padding/1 0.232 ms 0.232 ms 3026 +OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2617 +OpenCV_BottomHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2605 +OpenCV_MorphGrad2D_Constant_Padding/1 0.265 ms 0.265 ms 2647 +OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4880 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 7321fe74..24206b65 100644 --- a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:08:11+00:00", + "date": "2025-09-07T14:24:30+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.32373,4.18311,5.92236], + "load_avg": [3.04297,3.61035,4.71191], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 57, - "real_time": 1.2125408309593535e+01, - "cpu_time": 1.2124991000000001e+01, + "iterations": 58, + "real_time": 1.2075262786499385e+01, + "cpu_time": 1.2073443568965518e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 23, - "real_time": 3.0467491596937180e+01, - "cpu_time": 3.0466511304347815e+01, + "real_time": 3.0510915362316631e+01, + "cpu_time": 3.0506920913043473e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 308, - "real_time": 2.2320119518931811e+00, - "cpu_time": 2.2319393344155842e+00, + "iterations": 314, + "real_time": 2.1884666672747608e+00, + "cpu_time": 2.1881547292993626e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 374, - "real_time": 1.8465650133430001e+00, - "cpu_time": 1.8464962647058809e+00, + "iterations": 381, + "real_time": 1.8304030354723844e+00, + "cpu_time": 1.8301742624671924e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 249, - "real_time": 2.7999833196281907e+00, - "cpu_time": 2.7998932971887567e+00, + "iterations": 250, + "real_time": 2.8018697649240494e+00, + "cpu_time": 2.8014706559999993e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4622, - "real_time": 1.5002440166313480e-01, - "cpu_time": 1.5001801579402854e-01, + "iterations": 4721, + "real_time": 1.4815152991855834e-01, + "cpu_time": 1.4813229760643945e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2559, - "real_time": 2.7215578194799345e-01, - "cpu_time": 2.7214380500195384e-01, + "iterations": 2535, + "real_time": 2.7588403965594499e-01, + "cpu_time": 2.7584648875739659e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101510, - "real_time": 6.8781714849361188e-03, - "cpu_time": 6.8779688700620614e-03, + "iterations": 101044, + "real_time": 6.9154953151760795e-03, + "cpu_time": 6.9154101480543182e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48333, - "real_time": 1.4528235841763767e-02, - "cpu_time": 1.4527689549583108e-02, + "iterations": 47923, + "real_time": 1.4632370029027023e-02, + "cpu_time": 1.4632033240823825e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2975, - "real_time": 2.4345158654100754e-01, - "cpu_time": 2.4343695798319329e-01, + "iterations": 1991, + "real_time": 3.2995011431077570e-01, + "cpu_time": 3.2994335409342052e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2932, - "real_time": 2.4039734226091172e-01, - "cpu_time": 2.4038767871759889e-01, + "iterations": 1915, + "real_time": 2.8319770066607403e-01, + "cpu_time": 2.8319159164490931e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1773, - "real_time": 3.8597488842034705e-01, - "cpu_time": 3.8596146587704461e-01, + "iterations": 1138, + "real_time": 5.3577692049877923e-01, + "cpu_time": 5.3576669420035217e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1877, - "real_time": 3.7591625749270968e-01, - "cpu_time": 3.7591039158231215e-01, + "iterations": 1124, + "real_time": 4.9952752357912233e-01, + "cpu_time": 4.9943513434163772e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 708, - "real_time": 9.9595875537159750e-01, - "cpu_time": 9.9593150423728927e-01, + "iterations": 719, + "real_time": 9.1689284657735648e-01, + "cpu_time": 9.1676078859526888e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 677, - "real_time": 9.7564913805689735e-01, - "cpu_time": 9.7561057311669175e-01, + "iterations": 695, + "real_time": 9.6813468731564578e-01, + "cpu_time": 9.6800758705035905e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4933, - "real_time": 1.4562633877082792e-01, - "cpu_time": 1.4562218649908790e-01, + "iterations": 4798, + "real_time": 1.4492751205151355e-01, + "cpu_time": 1.4491701792413500e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3075, - "real_time": 2.2444998709166922e-01, - "cpu_time": 2.2444326016260174e-01, + "iterations": 2989, + "real_time": 2.3426035498248649e-01, + "cpu_time": 2.3425851689528218e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3052, - "real_time": 2.3302362516502878e-01, - "cpu_time": 2.3301645871559634e-01, + "iterations": 2969, + "real_time": 2.3408840153505603e-01, + "cpu_time": 2.3408375345234064e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2673, - "real_time": 2.6300297809637224e-01, - "cpu_time": 2.6299144257388679e-01, + "iterations": 2614, + "real_time": 2.6729449133838329e-01, + "cpu_time": 2.6729081216526446e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2683, - "real_time": 2.5624285049061701e-01, - "cpu_time": 2.5623349832277248e-01, + "iterations": 2630, + "real_time": 2.6423307454404721e-01, + "cpu_time": 2.6422895171102773e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2594, - "real_time": 2.6831575996065105e-01, - "cpu_time": 2.6831080185042405e-01, + "iterations": 2626, + "real_time": 2.7601832562244671e-01, + "cpu_time": 2.7601385605483625e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4916, - "real_time": 1.4157932588150096e-01, - "cpu_time": 1.4157454414157802e-01, + "iterations": 4881, + "real_time": 1.4363530366129135e-01, + "cpu_time": 1.4361626715836887e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index eb8ac2f9..bba83998 100644 --- a/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/SSE_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:08:11+00:00 +2025-09-07T14:24:30+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.32, 4.18, 5.92 +Load Average: 3.04, 3.61, 4.71 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 12.1 ms 12.1 ms 57 +Eigen_Convolve2D/1 12.1 ms 12.1 ms 58 MLIR_Conv2D/1 30.5 ms 30.5 ms 23 -Buddy_Conv2D/1 2.23 ms 2.23 ms 308 -Buddy_Corr2D_Constant_Padding/1 1.85 ms 1.85 ms 374 -OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 249 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4622 -Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2559 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101510 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 48333 -Buddy_Erosion2D_Constant_Padding/1 0.243 ms 0.243 ms 2975 -Buddy_Dilation2D_Constant_Padding/1 0.240 ms 0.240 ms 2932 -Buddy_Opening2D_Constant_Padding/1 0.386 ms 0.386 ms 1773 -Buddy_Closing2D_Constant_Padding/1 0.376 ms 0.376 ms 1877 -Buddy_TopHat2D_Constant_Padding/1 0.996 ms 0.996 ms 708 -Buddy_BottomHat2D_Constant_Padding/1 0.976 ms 0.976 ms 677 -OpenCV_Erode2D_Constant_Padding/1 0.146 ms 0.146 ms 4933 -OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3075 -OpenCV_Closing2D_Constant_Padding/1 0.233 ms 0.233 ms 3052 -OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2673 -OpenCV_BottomHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2683 -OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2594 -OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4916 +Buddy_Conv2D/1 2.19 ms 2.19 ms 314 +Buddy_Corr2D_Constant_Padding/1 1.83 ms 1.83 ms 381 +OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 250 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4721 +Buddy_Resize2D_Bilinear_Interpolation/1 0.276 ms 0.276 ms 2535 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101044 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47923 +Buddy_Erosion2D_Constant_Padding/1 0.330 ms 0.330 ms 1991 +Buddy_Dilation2D_Constant_Padding/1 0.283 ms 0.283 ms 1915 +Buddy_Opening2D_Constant_Padding/1 0.536 ms 0.536 ms 1138 +Buddy_Closing2D_Constant_Padding/1 0.500 ms 0.499 ms 1124 +Buddy_TopHat2D_Constant_Padding/1 0.917 ms 0.917 ms 719 +Buddy_BottomHat2D_Constant_Padding/1 0.968 ms 0.968 ms 695 +OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4798 +OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 2989 +OpenCV_Closing2D_Constant_Padding/1 0.234 ms 0.234 ms 2969 +OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2614 +OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2630 +OpenCV_MorphGrad2D_Constant_Padding/1 0.276 ms 0.276 ms 2626 +OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4881 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log index b270fff5..e709d0c2 100644 --- a/test_result/imageprocessing/image-processing-result.log +++ b/test_result/imageprocessing/image-processing-result.log @@ -47,3 +47,41 @@ Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign rand Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING [Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Testing AVX2 support +AVX2 is supported. +[Success] … +Testing AVX2 support +AVX2 is supported. +Running image-processing-benchmark for AVX2 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +Running image-processing-benchmark for AVX2 +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png prewittKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel3x3KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING From bfc79a85d0bc72b227b446b4989b3c56633c4883 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 16:36:18 +0200 Subject: [PATCH 49/52] update --- .github/workflows/bench.yml | 7 +- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 136 ++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 137 +++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++--- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 134 ++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 130 ++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 63 ++++---- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 89 +++++++++++- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 30 +++- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 128 ++++++++-------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 46 +++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 132 ++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 46 +++--- ...dom3x3KernelAlignInt_CONSTANT_PADDING.json | 132 ++++++++--------- ...ndom3x3KernelAlignInt_CONSTANT_PADDING.log | 48 +++--- ...om3x3KernelAlignInt_REPLICATE_PADDING.json | 132 ++++++++--------- ...dom3x3KernelAlignInt_REPLICATE_PADDING.log | 48 +++--- .../image-processing-result.log | 42 ++++++ .../vectorization/vectorization_matrix.json | 16 +- .../vectorization/vectorization_matrix.log | 8 +- .../vectorization/vectorization_result.log | 54 ++++--- 23 files changed, 928 insertions(+), 774 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 141e9a4c..77ae1b32 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -85,11 +85,7 @@ jobs: # ------------------------------------------------------------ # 4) make /benchmarks/ point to the most recent run as well # ------------------------------------------------------------ - - name: Add top-level benchmarks index - working-directory: /home/quliu/buddy-complier-workspace/buddy-benchmark/site - run: | - set -e - printf '%s\n' "" > benchmarks/index.html + # (Removed) previous redirect-only index; replaced by full listing below - name: Upload site artifact uses: actions/upload-pages-artifact@v3 @@ -132,6 +128,7 @@ jobs: '---' \ 'layout: default' \ 'title: Benchmarks' \ + 'nav_exclude: true' \ '---' \ '

          Benchmark runs

          ' \ '

          Select a date and commit:

          ' diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 7e3224f1..937dc3e3 100644 --- a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:12:49+00:00", + "date": "2025-09-07T14:29:02+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.02686,3.38477,5.12598], + "load_avg": [3.04053,3.38672,4.34863], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 137, - "real_time": 5.0879708715598948e+00, - "cpu_time": 5.0878425036496351e+00, + "iterations": 140, + "real_time": 4.9702270754746030e+00, + "cpu_time": 4.9699425000000002e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 93, - "real_time": 7.6264414575792125e+00, - "cpu_time": 7.6260674086021520e+00, + "iterations": 92, + "real_time": 7.5326938101130985e+00, + "cpu_time": 7.5326059673913059e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1666, - "real_time": 4.2049797410581435e-01, - "cpu_time": 4.2046406542617032e-01, + "iterations": 1614, + "real_time": 4.3082923906517145e-01, + "cpu_time": 4.3081728562577470e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 638, - "real_time": 1.1059913177008165e+00, - "cpu_time": 1.1059258667711596e+00, + "iterations": 624, + "real_time": 1.1141470943888028e+00, + "cpu_time": 1.1141273092948720e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 361, - "real_time": 1.9581975348273142e+00, - "cpu_time": 1.9581129722991706e+00, + "iterations": 359, + "real_time": 1.9465408347609316e+00, + "cpu_time": 1.9465103370473527e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4637, - "real_time": 1.5017545063202048e-01, - "cpu_time": 1.5017112659046808e-01, + "iterations": 4753, + "real_time": 1.4783761252635608e-01, + "cpu_time": 1.4782938796549550e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2576, - "real_time": 2.7208778318827564e-01, - "cpu_time": 2.7207776824534169e-01, + "iterations": 2588, + "real_time": 2.7286772899778772e-01, + "cpu_time": 2.7285900193199364e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102256, - "real_time": 7.4423461665179763e-03, - "cpu_time": 7.4419053551869818e-03, + "iterations": 101129, + "real_time": 6.9313498568830253e-03, + "cpu_time": 6.9310267381265502e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48431, - "real_time": 1.4448959287854052e-02, - "cpu_time": 1.4448407239164986e-02, + "iterations": 47932, + "real_time": 1.4616750139684954e-02, + "cpu_time": 1.4616182466828002e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2937, - "real_time": 2.5159487000403047e-01, - "cpu_time": 2.5157496322778355e-01, + "iterations": 1876, + "real_time": 3.1442655476807024e-01, + "cpu_time": 3.1440550479744134e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2951, - "real_time": 2.5003220001062026e-01, - "cpu_time": 2.5001731955269441e-01, + "iterations": 2598, + "real_time": 3.2269134486611756e-01, + "cpu_time": 3.2267296497305642e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1844, - "real_time": 4.3209650656794263e-01, - "cpu_time": 4.3208606724511922e-01, + "iterations": 1000, + "real_time": 5.9975032135844231e-01, + "cpu_time": 5.9970746099999950e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1576, - "real_time": 4.0304744237948797e-01, - "cpu_time": 4.0303471763959337e-01, + "iterations": 1107, + "real_time": 5.4013467281922201e-01, + "cpu_time": 5.4010315356820238e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 672, - "real_time": 9.8597416875972632e-01, - "cpu_time": 9.8593559821428622e-01, + "iterations": 670, + "real_time": 9.5614417275386065e-01, + "cpu_time": 9.5608898805969977e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 661, - "real_time": 9.8078462905313890e-01, - "cpu_time": 9.8075158850226873e-01, + "iterations": 701, + "real_time": 9.5555469521272196e-01, + "cpu_time": 9.5552639372325310e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4899, - "real_time": 1.4225564095579282e-01, - "cpu_time": 1.4225357236170624e-01, + "iterations": 4869, + "real_time": 1.4373984491587616e-01, + "cpu_time": 1.4373398007804475e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3062, - "real_time": 2.2066803679483377e-01, - "cpu_time": 2.2066302024820375e-01, + "iterations": 3020, + "real_time": 2.3255034763094606e-01, + "cpu_time": 2.3254246953642435e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3062, - "real_time": 2.2651180065316676e-01, - "cpu_time": 2.2650612083605487e-01, + "iterations": 3017, + "real_time": 2.3421093841281837e-01, + "cpu_time": 2.3420040371229658e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2652, - "real_time": 2.6190968519736918e-01, - "cpu_time": 2.6190097134238283e-01, + "iterations": 2570, + "real_time": 2.7231803663973680e-01, + "cpu_time": 2.7231148404669192e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2671, - "real_time": 2.6485771191829782e-01, - "cpu_time": 2.6484863721452689e-01, + "iterations": 2568, + "real_time": 2.7264940416051592e-01, + "cpu_time": 2.7263315109034342e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2662, - "real_time": 2.6326207129199375e-01, - "cpu_time": 2.6325002854996282e-01, + "iterations": 2637, + "real_time": 2.6645213591586836e-01, + "cpu_time": 2.6643527948426282e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4870, - "real_time": 1.4466622373896212e-01, - "cpu_time": 1.4466395708418861e-01, + "iterations": 4955, + "real_time": 1.4140665997289625e-01, + "cpu_time": 1.4139799172553003e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 3e35d00b..0aab2d9d 100644 --- a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:12:49+00:00 +2025-09-07T14:29:02+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.03, 3.38, 5.13 +Load Average: 3.04, 3.39, 4.35 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 5.09 ms 5.09 ms 137 -MLIR_Conv2D/1 7.63 ms 7.63 ms 93 -Buddy_Conv2D/1 0.420 ms 0.420 ms 1666 -Buddy_Corr2D_Constant_Padding/1 1.11 ms 1.11 ms 638 -OpenCV_Filter2D_Constant_Padding/1 1.96 ms 1.96 ms 361 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4637 -Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2576 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102256 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48431 -Buddy_Erosion2D_Constant_Padding/1 0.252 ms 0.252 ms 2937 -Buddy_Dilation2D_Constant_Padding/1 0.250 ms 0.250 ms 2951 -Buddy_Opening2D_Constant_Padding/1 0.432 ms 0.432 ms 1844 -Buddy_Closing2D_Constant_Padding/1 0.403 ms 0.403 ms 1576 -Buddy_TopHat2D_Constant_Padding/1 0.986 ms 0.986 ms 672 -Buddy_BottomHat2D_Constant_Padding/1 0.981 ms 0.981 ms 661 -OpenCV_Erode2D_Constant_Padding/1 0.142 ms 0.142 ms 4899 -OpenCV_Opening2D_Constant_Padding/1 0.221 ms 0.221 ms 3062 -OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3062 -OpenCV_TopHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2652 -OpenCV_BottomHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2671 -OpenCV_MorphGrad2D_Constant_Padding/1 0.263 ms 0.263 ms 2662 -OpenCV_Dilate2D_Constant_Padding/1 0.145 ms 0.145 ms 4870 +Eigen_Convolve2D/1 4.97 ms 4.97 ms 140 +MLIR_Conv2D/1 7.53 ms 7.53 ms 92 +Buddy_Conv2D/1 0.431 ms 0.431 ms 1614 +Buddy_Corr2D_Constant_Padding/1 1.11 ms 1.11 ms 624 +OpenCV_Filter2D_Constant_Padding/1 1.95 ms 1.95 ms 359 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4753 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2588 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101129 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47932 +Buddy_Erosion2D_Constant_Padding/1 0.314 ms 0.314 ms 1876 +Buddy_Dilation2D_Constant_Padding/1 0.323 ms 0.323 ms 2598 +Buddy_Opening2D_Constant_Padding/1 0.600 ms 0.600 ms 1000 +Buddy_Closing2D_Constant_Padding/1 0.540 ms 0.540 ms 1107 +Buddy_TopHat2D_Constant_Padding/1 0.956 ms 0.956 ms 670 +Buddy_BottomHat2D_Constant_Padding/1 0.956 ms 0.956 ms 701 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4869 +OpenCV_Opening2D_Constant_Padding/1 0.233 ms 0.233 ms 3020 +OpenCV_Closing2D_Constant_Padding/1 0.234 ms 0.234 ms 3017 +OpenCV_TopHat2D_Constant_Padding/1 0.272 ms 0.272 ms 2570 +OpenCV_BottomHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2568 +OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2637 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4955 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 50bf9daa..825c9e79 100644 --- a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:13:14+00:00", + "date": "2025-09-07T14:29:26+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.80664,3.30371,5.05225], + "load_avg": [3.02539,3.35449,4.31201], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 140, - "real_time": 4.9840724893978665e+00, - "cpu_time": 4.9838604142857141e+00, + "iterations": 109, + "real_time": 4.9761295523665368e+00, + "cpu_time": 4.9760039174311919e+00, "time_unit": "ms" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 92, - "real_time": 7.5595184917683182e+00, - "cpu_time": 7.5592466739130435e+00, + "iterations": 93, + "real_time": 7.5348360083436452e+00, + "cpu_time": 7.5346214623655925e+00, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1608, - "real_time": 4.3011411551886530e-01, - "cpu_time": 4.3009694029850742e-01, + "iterations": 1613, + "real_time": 4.3241579896237492e-01, + "cpu_time": 4.3240159950402979e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 630, - "real_time": 1.1155422008226787e+00, - "cpu_time": 1.1154895079365073e+00, + "iterations": 621, + "real_time": 1.1201563460719375e+00, + "cpu_time": 1.1201345571658614e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 359, - "real_time": 1.9513797481910100e+00, - "cpu_time": 1.9512984373259061e+00, + "iterations": 358, + "real_time": 1.9431075591115312e+00, + "cpu_time": 1.9430616452513958e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4613, - "real_time": 1.5032416584691746e-01, - "cpu_time": 1.5031801712551474e-01, + "iterations": 4730, + "real_time": 1.4787029561608336e-01, + "cpu_time": 1.4785987547568707e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2556, - "real_time": 2.7321518365299569e-01, - "cpu_time": 2.7320184233176831e-01, + "iterations": 2582, + "real_time": 2.7276169563521718e-01, + "cpu_time": 2.7275317970565449e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101295, - "real_time": 6.8678814235412153e-03, - "cpu_time": 6.8675380719680160e-03, + "iterations": 100345, + "real_time": 6.9529070208036907e-03, + "cpu_time": 6.9522148388061148e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48397, - "real_time": 1.4453964790490097e-02, - "cpu_time": 1.4453691468479458e-02, + "iterations": 47937, + "real_time": 1.4593899464516223e-02, + "cpu_time": 1.4593418570206736e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2868, - "real_time": 2.4664656855321829e-01, - "cpu_time": 2.4663631938633196e-01, + "iterations": 2390, + "real_time": 2.9568342203625075e-01, + "cpu_time": 2.9566401338912135e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2836, - "real_time": 2.4140081453768927e-01, - "cpu_time": 2.4138928561354012e-01, + "iterations": 2256, + "real_time": 2.9021860859918258e-01, + "cpu_time": 2.9020803501773085e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1792, - "real_time": 3.8493265414477457e-01, - "cpu_time": 3.8491138560267846e-01, + "iterations": 882, + "real_time": 6.7151636898923084e-01, + "cpu_time": 6.7144004875283347e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1804, - "real_time": 3.9144566162858990e-01, - "cpu_time": 3.9143657760532191e-01, + "iterations": 1000, + "real_time": 6.1067149415612221e-01, + "cpu_time": 6.1064459099999979e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 706, - "real_time": 1.0073629099738497e+00, - "cpu_time": 1.0073418994334273e+00, + "iterations": 460, + "real_time": 1.3535791765088621e+00, + "cpu_time": 1.3534489391304345e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 664, - "real_time": 1.0099171279334878e+00, - "cpu_time": 1.0098988192771083e+00, + "iterations": 753, + "real_time": 9.4793929877984096e-01, + "cpu_time": 9.4790966268260402e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3883, - "real_time": 1.5074242133778640e-01, - "cpu_time": 1.5073515297450438e-01, + "iterations": 4882, + "real_time": 1.4292015489961513e-01, + "cpu_time": 1.4291047501024184e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3136, - "real_time": 2.1938790391408364e-01, - "cpu_time": 2.1937540688775489e-01, + "iterations": 3137, + "real_time": 2.2372837479151775e-01, + "cpu_time": 2.2371705769843761e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3187, - "real_time": 2.2193600938243105e-01, - "cpu_time": 2.2192187480388995e-01, + "iterations": 3091, + "real_time": 2.2564606758546227e-01, + "cpu_time": 2.2562969718537662e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2728, - "real_time": 2.6235867925316009e-01, - "cpu_time": 2.6234814479472190e-01, + "iterations": 2642, + "real_time": 2.6471110273725063e-01, + "cpu_time": 2.6470357607872869e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2717, - "real_time": 2.6013763031189086e-01, - "cpu_time": 2.6012419690835509e-01, + "iterations": 2626, + "real_time": 2.7250474842331923e-01, + "cpu_time": 2.7248917098248282e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2635, - "real_time": 2.6362094130416525e-01, - "cpu_time": 2.6361262201138502e-01, + "iterations": 2667, + "real_time": 2.6142182346135878e-01, + "cpu_time": 2.6141412335957992e-01, "time_unit": "ms" }, { @@ -339,10 +339,11 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4736, - "real_time": 1.4790402162140487e-01, - "cpu_time": 1.4790140983952701e-01, + "iterations": 4904, + "real_time": 1.4250244045870145e-01, + "cpu_time": 1.4249685929853156e-01, "time_unit": "ms" } ] } + diff --git a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 9de080dd..97464ce6 100644 --- a/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_laplacianKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:13:14+00:00 +2025-09-07T14:29:26+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.81, 3.30, 5.05 +Load Average: 3.03, 3.35, 4.31 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 4.98 ms 4.98 ms 140 -MLIR_Conv2D/1 7.56 ms 7.56 ms 92 -Buddy_Conv2D/1 0.430 ms 0.430 ms 1608 -Buddy_Corr2D_Constant_Padding/1 1.12 ms 1.12 ms 630 -OpenCV_Filter2D_Constant_Padding/1 1.95 ms 1.95 ms 359 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4613 -Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2556 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101295 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48397 -Buddy_Erosion2D_Constant_Padding/1 0.247 ms 0.247 ms 2868 -Buddy_Dilation2D_Constant_Padding/1 0.241 ms 0.241 ms 2836 -Buddy_Opening2D_Constant_Padding/1 0.385 ms 0.385 ms 1792 -Buddy_Closing2D_Constant_Padding/1 0.391 ms 0.391 ms 1804 -Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 706 -Buddy_BottomHat2D_Constant_Padding/1 1.01 ms 1.01 ms 664 -OpenCV_Erode2D_Constant_Padding/1 0.151 ms 0.151 ms 3883 -OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3136 -OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3187 -OpenCV_TopHat2D_Constant_Padding/1 0.262 ms 0.262 ms 2728 -OpenCV_BottomHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2717 -OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2635 -OpenCV_Dilate2D_Constant_Padding/1 0.148 ms 0.148 ms 4736 +Eigen_Convolve2D/1 4.98 ms 4.98 ms 109 +MLIR_Conv2D/1 7.53 ms 7.53 ms 93 +Buddy_Conv2D/1 0.432 ms 0.432 ms 1613 +Buddy_Corr2D_Constant_Padding/1 1.12 ms 1.12 ms 621 +OpenCV_Filter2D_Constant_Padding/1 1.94 ms 1.94 ms 358 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4730 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2582 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100345 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47937 +Buddy_Erosion2D_Constant_Padding/1 0.296 ms 0.296 ms 2390 +Buddy_Dilation2D_Constant_Padding/1 0.290 ms 0.290 ms 2256 +Buddy_Opening2D_Constant_Padding/1 0.672 ms 0.671 ms 882 +Buddy_Closing2D_Constant_Padding/1 0.611 ms 0.611 ms 1000 +Buddy_TopHat2D_Constant_Padding/1 1.35 ms 1.35 ms 460 +Buddy_BottomHat2D_Constant_Padding/1 0.948 ms 0.948 ms 753 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4882 +OpenCV_Opening2D_Constant_Padding/1 0.224 ms 0.224 ms 3137 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3091 +OpenCV_TopHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2642 +OpenCV_BottomHat2D_Constant_Padding/1 0.273 ms 0.272 ms 2626 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2667 +OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.142 ms 4904 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 448cffd8..1dc67624 100644 --- a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:13:38+00:00", + "date": "2025-09-07T14:29:50+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.88232,3.27881,4.99609], + "load_avg": [3.01562,3.32471,4.27539], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 57, - "real_time": 1.2154607396376761e+01, - "cpu_time": 1.2154355473684211e+01, + "iterations": 52, + "real_time": 1.1901946451801519e+01, + "cpu_time": 1.1901702288461539e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 23, - "real_time": 3.0489446352357451e+01, - "cpu_time": 3.0488431521739127e+01, + "real_time": 3.0438764263754305e+01, + "cpu_time": 3.0438311043478276e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 791, - "real_time": 8.8596336458787306e-01, - "cpu_time": 8.8593867635903911e-01, + "iterations": 637, + "real_time": 1.1142698910490871e+00, + "cpu_time": 1.1142385604395602e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 381, - "real_time": 1.8370149857572371e+00, - "cpu_time": 1.8369234225721787e+00, + "iterations": 375, + "real_time": 1.8523898224035900e+00, + "cpu_time": 1.8523744240000006e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 251, - "real_time": 2.8152136926157065e+00, - "cpu_time": 2.8151388007968121e+00, + "iterations": 249, + "real_time": 2.7968978965617568e+00, + "cpu_time": 2.7968237389558248e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4625, - "real_time": 1.4954823497179393e-01, - "cpu_time": 1.4954442335135137e-01, + "iterations": 4716, + "real_time": 1.4841855141042753e-01, + "cpu_time": 1.4841650890585228e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2572, - "real_time": 2.7326759223248315e-01, - "cpu_time": 2.7326206259720059e-01, + "iterations": 2583, + "real_time": 2.7219368201838046e-01, + "cpu_time": 2.7218788850174197e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102091, - "real_time": 6.9029587996913182e-03, - "cpu_time": 6.9026970937692798e-03, + "iterations": 100958, + "real_time": 6.9158962019867246e-03, + "cpu_time": 6.9157541452881457e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48431, - "real_time": 1.4439809400753574e-02, - "cpu_time": 1.4439482418285833e-02, + "iterations": 47295, + "real_time": 1.4739391594914103e-02, + "cpu_time": 1.4739279247277717e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2833, - "real_time": 2.3951560411401068e-01, - "cpu_time": 2.3949907130250636e-01, + "iterations": 1902, + "real_time": 3.5209773396279909e-01, + "cpu_time": 3.5206602313354285e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2959, - "real_time": 2.4086729588440925e-01, - "cpu_time": 2.4085952145995312e-01, + "iterations": 2372, + "real_time": 2.8494088541446205e-01, + "cpu_time": 2.8493101180438485e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1787, - "real_time": 3.8442675443287994e-01, - "cpu_time": 3.8440008337996617e-01, + "iterations": 1419, + "real_time": 5.2841257374836070e-01, + "cpu_time": 5.2840475828047939e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1757, - "real_time": 3.9290034837117571e-01, - "cpu_time": 3.9287781274900396e-01, + "iterations": 1000, + "real_time": 5.3209472447633743e-01, + "cpu_time": 5.3207190200000021e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 664, - "real_time": 1.0083697298653873e+00, - "cpu_time": 1.0083280978915672e+00, + "iterations": 726, + "real_time": 9.2559484841902395e-01, + "cpu_time": 9.2558814462809835e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 650, - "real_time": 9.9526015611795282e-01, - "cpu_time": 9.9524376923076896e-01, + "iterations": 722, + "real_time": 9.3495421152861169e-01, + "cpu_time": 9.3492223961218790e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4833, - "real_time": 1.4464196666362936e-01, - "cpu_time": 1.4463780633147102e-01, + "iterations": 4207, + "real_time": 1.6627396342446635e-01, + "cpu_time": 1.6627164606608016e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3097, - "real_time": 2.2951874991444030e-01, - "cpu_time": 2.2951408104617452e-01, + "iterations": 2721, + "real_time": 2.5683478785980629e-01, + "cpu_time": 2.5682696435134150e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3072, - "real_time": 2.2713451107847504e-01, - "cpu_time": 2.2713078320312519e-01, + "iterations": 2682, + "real_time": 2.5946399131699521e-01, + "cpu_time": 2.5946211446681705e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2632, - "real_time": 2.6567278452511978e-01, - "cpu_time": 2.6566629559270510e-01, + "iterations": 2392, + "real_time": 2.9280804172407426e-01, + "cpu_time": 2.9279882984949784e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2644, - "real_time": 2.6454315795570205e-01, - "cpu_time": 2.6453323524962147e-01, + "iterations": 2392, + "real_time": 2.9102603851951486e-01, + "cpu_time": 2.9099642642140494e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2613, - "real_time": 2.6846960489201538e-01, - "cpu_time": 2.6846243245311946e-01, + "iterations": 2374, + "real_time": 2.9361070696305303e-01, + "cpu_time": 2.9360214321819761e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4934, - "real_time": 1.4218385246392440e-01, - "cpu_time": 1.4217726043777826e-01, + "iterations": 4301, + "real_time": 1.6287719699649639e-01, + "cpu_time": 1.6287602022785452e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index f1e06fb2..09a24a36 100644 --- a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:13:38+00:00 +2025-09-07T14:29:50+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.88, 3.28, 5.00 +Load Average: 3.02, 3.32, 4.28 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 12.2 ms 12.2 ms 57 -MLIR_Conv2D/1 30.5 ms 30.5 ms 23 -Buddy_Conv2D/1 0.886 ms 0.886 ms 791 -Buddy_Corr2D_Constant_Padding/1 1.84 ms 1.84 ms 381 -OpenCV_Filter2D_Constant_Padding/1 2.82 ms 2.82 ms 251 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4625 -Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2572 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102091 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48431 -Buddy_Erosion2D_Constant_Padding/1 0.240 ms 0.239 ms 2833 -Buddy_Dilation2D_Constant_Padding/1 0.241 ms 0.241 ms 2959 -Buddy_Opening2D_Constant_Padding/1 0.384 ms 0.384 ms 1787 -Buddy_Closing2D_Constant_Padding/1 0.393 ms 0.393 ms 1757 -Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 664 -Buddy_BottomHat2D_Constant_Padding/1 0.995 ms 0.995 ms 650 -OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4833 -OpenCV_Opening2D_Constant_Padding/1 0.230 ms 0.230 ms 3097 -OpenCV_Closing2D_Constant_Padding/1 0.227 ms 0.227 ms 3072 -OpenCV_TopHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2632 -OpenCV_BottomHat2D_Constant_Padding/1 0.265 ms 0.265 ms 2644 -OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2613 -OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4934 +Eigen_Convolve2D/1 11.9 ms 11.9 ms 52 +MLIR_Conv2D/1 30.4 ms 30.4 ms 23 +Buddy_Conv2D/1 1.11 ms 1.11 ms 637 +Buddy_Corr2D_Constant_Padding/1 1.85 ms 1.85 ms 375 +OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 249 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4716 +Buddy_Resize2D_Bilinear_Interpolation/1 0.272 ms 0.272 ms 2583 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100958 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47295 +Buddy_Erosion2D_Constant_Padding/1 0.352 ms 0.352 ms 1902 +Buddy_Dilation2D_Constant_Padding/1 0.285 ms 0.285 ms 2372 +Buddy_Opening2D_Constant_Padding/1 0.528 ms 0.528 ms 1419 +Buddy_Closing2D_Constant_Padding/1 0.532 ms 0.532 ms 1000 +Buddy_TopHat2D_Constant_Padding/1 0.926 ms 0.926 ms 726 +Buddy_BottomHat2D_Constant_Padding/1 0.935 ms 0.935 ms 722 +OpenCV_Erode2D_Constant_Padding/1 0.166 ms 0.166 ms 4207 +OpenCV_Opening2D_Constant_Padding/1 0.257 ms 0.257 ms 2721 +OpenCV_Closing2D_Constant_Padding/1 0.259 ms 0.259 ms 2682 +OpenCV_TopHat2D_Constant_Padding/1 0.293 ms 0.293 ms 2392 +OpenCV_BottomHat2D_Constant_Padding/1 0.291 ms 0.291 ms 2392 +OpenCV_MorphGrad2D_Constant_Padding/1 0.294 ms 0.294 ms 2374 +OpenCV_Dilate2D_Constant_Padding/1 0.163 ms 0.163 ms 4301 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 91f4ace5..07ef45e8 100644 --- a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:14:03+00:00", + "date": "2025-09-07T14:30:14+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.92334,3.25537,4.94238], + "load_avg": [3.08984,3.31445,4.24609], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 58, - "real_time": 1.2106177300728600e+01, - "cpu_time": 1.2105513637931036e+01, + "real_time": 1.1998113887063388e+01, + "cpu_time": 1.1997172689655171e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 23, - "real_time": 3.0653080862501394e+01, - "cpu_time": 3.0651176565217387e+01, + "real_time": 3.0430977273246516e+01, + "cpu_time": 3.0430230913043474e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 682, - "real_time": 1.0277371405681208e+00, - "cpu_time": 1.0276775366568915e+00, + "iterations": 694, + "real_time": 9.3945750284950735e-01, + "cpu_time": 9.3939238472622510e-01, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 369, - "real_time": 1.8722422239257068e+00, - "cpu_time": 1.8721983197831980e+00, + "iterations": 379, + "real_time": 1.8339350839246231e+00, + "cpu_time": 1.8338942585751978e+00, "time_unit": "ms" }, { @@ -102,8 +102,8 @@ "repetition_index": 0, "threads": 1, "iterations": 249, - "real_time": 2.8038945303384559e+00, - "cpu_time": 2.8037449116465871e+00, + "real_time": 2.7970489727924148e+00, + "cpu_time": 2.7968971004016070e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4676, - "real_time": 1.4948565831568836e-01, - "cpu_time": 1.4947427502138574e-01, + "iterations": 4735, + "real_time": 1.4771520226029433e-01, + "cpu_time": 1.4771250242872222e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2573, - "real_time": 2.7347582233494189e-01, - "cpu_time": 2.7346715157403839e-01, + "iterations": 2581, + "real_time": 2.7436091312370020e-01, + "cpu_time": 2.7433653428903537e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101945, - "real_time": 6.9013301463456663e-03, - "cpu_time": 6.9010407082250192e-03, + "iterations": 101032, + "real_time": 6.9237584291694564e-03, + "cpu_time": 6.9233851156069301e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48219, - "real_time": 1.4578375478092348e-02, - "cpu_time": 1.4577905348514077e-02, + "iterations": 47897, + "real_time": 1.4611094101790878e-02, + "cpu_time": 1.4609948556276994e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2797, - "real_time": 2.4615478038276398e-01, - "cpu_time": 2.4613685055416532e-01, + "iterations": 2315, + "real_time": 3.1752810043083668e-01, + "cpu_time": 3.1751235248380194e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2766, - "real_time": 2.5387134588850813e-01, - "cpu_time": 2.5385709797541600e-01, + "iterations": 1991, + "real_time": 3.0030406326699172e-01, + "cpu_time": 3.0026843294826744e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1746, - "real_time": 3.8558237450912097e-01, - "cpu_time": 3.8556492955326516e-01, + "iterations": 1162, + "real_time": 5.2669528301119184e-01, + "cpu_time": 5.2667063166953465e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1774, - "real_time": 4.0888474475100989e-01, - "cpu_time": 4.0886437429537814e-01, + "iterations": 1147, + "real_time": 5.8120846059806675e-01, + "cpu_time": 5.8116856669572725e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 683, - "real_time": 1.0146626910824728e+00, - "cpu_time": 1.0145995739385083e+00, + "iterations": 742, + "real_time": 9.3807371438674203e-01, + "cpu_time": 9.3802853369272166e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 678, - "real_time": 1.0159070435966362e+00, - "cpu_time": 1.0158491327433619e+00, + "iterations": 731, + "real_time": 9.1354743094679103e-01, + "cpu_time": 9.1349827770177838e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4842, - "real_time": 1.4470969255627408e-01, - "cpu_time": 1.4470714684014824e-01, + "iterations": 4901, + "real_time": 1.4285111738647449e-01, + "cpu_time": 1.4284396123240140e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3067, - "real_time": 2.2680669984096108e-01, - "cpu_time": 2.2680151613954949e-01, + "iterations": 2990, + "real_time": 2.3436977786563312e-01, + "cpu_time": 2.3436707424749126e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3056, - "real_time": 2.2937234427721401e-01, - "cpu_time": 2.2935791001308917e-01, + "iterations": 2977, + "real_time": 2.3498714375624050e-01, + "cpu_time": 2.3497187302653738e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2685, - "real_time": 2.6044210824886516e-01, - "cpu_time": 2.6042399217877105e-01, + "iterations": 2568, + "real_time": 2.7303778743400381e-01, + "cpu_time": 2.7303231347352053e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2731, - "real_time": 2.6085500406590367e-01, - "cpu_time": 2.6084955108019009e-01, + "iterations": 2572, + "real_time": 2.6648068832433430e-01, + "cpu_time": 2.6646258125972005e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2609, - "real_time": 2.6988330050509018e-01, - "cpu_time": 2.6987879264085829e-01, + "iterations": 2624, + "real_time": 2.6757532884026081e-01, + "cpu_time": 2.6757154878048883e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4928, - "real_time": 1.4262092556780229e-01, - "cpu_time": 1.4261844784902611e-01, + "iterations": 4938, + "real_time": 1.4144324261126320e-01, + "cpu_time": 1.4143231227217509e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 4e0b0dbe..e95c5e72 100644 --- a/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_logKernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:14:03+00:00 +2025-09-07T14:30:14+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,41 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.92, 3.26, 4.94 +Load Average: 3.09, 3.31, 4.25 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 12.1 ms 12.1 ms 58 -MLIR_Conv2D/1 30.7 ms 30.7 ms 23 -Buddy_Conv2D/1 1.03 ms 1.03 ms 682 -Buddy_Corr2D_Constant_Padding/1 1.87 ms 1.87 ms 369 +Eigen_Convolve2D/1 12.0 ms 12.0 ms 58 +MLIR_Conv2D/1 30.4 ms 30.4 ms 23 +Buddy_Conv2D/1 0.939 ms 0.939 ms 694 +Buddy_Corr2D_Constant_Padding/1 1.83 ms 1.83 ms 379 OpenCV_Filter2D_Constant_Padding/1 2.80 ms 2.80 ms 249 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4676 -Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2573 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101945 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 48219 -Buddy_Erosion2D_Constant_Padding/1 0.246 ms 0.246 ms 2797 -Buddy_Dilation2D_Constant_Padding/1 0.254 ms 0.254 ms 2766 -Buddy_Opening2D_Constant_Padding/1 0.386 ms 0.386 ms 1746 -Buddy_Closing2D_Constant_Padding/1 0.409 ms 0.409 ms 1774 -Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 683 -Buddy_BottomHat2D_Constant_Padding/1 1.02 ms 1.02 ms 678 -OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4842 -OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 3067 -OpenCV_Closing2D_Constant_Padding/1 0.229 ms 0.229 ms 3056 -OpenCV_TopHat2D_Constant_Padding/1 0.260 ms 0.260 ms 2685 -OpenCV_BottomHat2D_Constant_Padding/1 0.261 ms 0.261 ms 2731 -OpenCV_MorphGrad2D_Constant_Padding/1 0.270 ms 0.270 ms 2609 -OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4928 -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. -Saved PNG file. +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4735 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2581 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101032 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47897 +Buddy_Erosion2D_Constant_Padding/1 0.318 ms 0.318 ms 2315 +Buddy_Dilation2D_Constant_Padding/1 0.300 ms 0.300 ms 1991 +Buddy_Opening2D_Constant_Padding/1 0.527 ms 0.527 ms 1162 +Buddy_Closing2D_Constant_Padding/1 0.581 ms 0.581 ms 1147 +Buddy_TopHat2D_Constant_Padding/1 0.938 ms 0.938 ms 742 +Buddy_BottomHat2D_Constant_Padding/1 0.914 ms 0.913 ms 731 +OpenCV_Erode2D_Constant_Padding/1 0.143 ms 0.143 ms 4901 +OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 2990 +OpenCV_Closing2D_Constant_Padding/1 0.235 ms 0.235 ms 2977 +OpenCV_TopHat2D_Constant_Padding/1 0.273 ms 0.273 ms 2568 +OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2572 +OpenCV_MorphGrad2D_Constant_Padding/1 0.268 ms 0.268 ms 2624 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4938 Saved PNG file. Saved PNG file. Saved PNG file. @@ -54,3 +46,12 @@ Saved PNG file. Saved PNG file. Saved PNG file. Saved PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +ERROR: Can't save PNG file. +Exception converting image to PNG format. +ERROR: Can't save PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index e0d17d98..cd4dab75 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -245,7 +245,8 @@ "real_time": 9.4657656001417256e-01, "cpu_time": 9.4650936842105216e-01, "time_unit": "ms" - } { + }, + { "name": "OpenCV_Erode2D_Constant_Padding/1", "family_index": 15, "per_family_instance_index": 0, @@ -254,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4117, - "real_time": 1.6993736472267648e-01, - "cpu_time": 1.6993461015302383e-01, + "iterations": 4886, + "real_time": 1.4358902494169579e-01, + "cpu_time": 1.4358145968072070e-01, "time_unit": "ms" }, { @@ -268,8 +269,80 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2816, - "real_time": 2.4825724275698038e-01, - "cpu_time": 2.4825158025568175e-01, + "iterations": 2978, + "real_time": 2.3539010072571068e-01, + "cpu_time": 2.3538351511081329e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Closing2D_Constant_Padding/1", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "OpenCV_Closing2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2979, + "real_time": 2.3258523218341223e-01, + "cpu_time": 2.3257072171869730e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_TopHat2D_Constant_Padding/1", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "OpenCV_TopHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2603, + "real_time": 2.6823798372706131e-01, + "cpu_time": 2.6822945024971095e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_BottomHat2D_Constant_Padding/1", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "OpenCV_BottomHat2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2574, + "real_time": 2.7231369876731898e-01, + "cpu_time": 2.7229450038850062e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "OpenCV_MorphGrad2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2619, + "real_time": 2.6665248173072437e-01, + "cpu_time": 2.6664444520809394e-01, + "time_unit": "ms" + }, + { + "name": "OpenCV_Dilate2D_Constant_Padding/1", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "OpenCV_Dilate2D_Constant_Padding/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4960, + "real_time": 1.4122858448254486e-01, + "cpu_time": 1.4122089999999995e-01, "time_unit": "ms" - } \ No newline at end of file + } + ] +} diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index 7befb623..6504a841 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel5x5KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -26,5 +26,31 @@ Buddy_Opening2D_Constant_Padding/1 0.582 ms 0.582 ms Buddy_Closing2D_Constant_Padding/1 0.569 ms 0.569 ms 1171 Buddy_TopHat2D_Constant_Padding/1 0.953 ms 0.953 ms 737 Buddy_BottomHat2D_Constant_Padding/1 0.947 ms 0.947 ms 722 -OpenCV_Erode2D_Constant_Padding/1 0.170 ms 0.170 ms 4117 -OpenCV_Opening2D_Constant_Padding/1 0.248 ms 0.248 ms 2816 +OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4886 +OpenCV_Opening2D_Constant_Padding/1 0.235 ms 0.235 ms 2978 +OpenCV_Closing2D_Constant_Padding/1 0.233 ms 0.233 ms 2979 +OpenCV_TopHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2603 +OpenCV_BottomHat2D_Constant_Padding/1 0.272 ms 0.272 ms 2574 +OpenCV_MorphGrad2D_Constant_Padding/1 0.267 ms 0.267 ms 2619 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4960 +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. +Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index ab377824..9cd4797c 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:11:10+00:00", + "date": "2025-09-07T14:27:26+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.49512,3.48877,5.34766], + "load_avg": [3.20947,3.53711,4.49951], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 31, - "real_time": 2.2698959636111415e+01, - "cpu_time": 2.2698047322580649e+01, + "real_time": 2.2631176176571078e+01, + "cpu_time": 2.2627850516129033e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 10, - "real_time": 6.9128799811005592e+01, - "cpu_time": 6.9123686800000002e+01, + "real_time": 7.0389824360609055e+01, + "cpu_time": 7.0386717899999994e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 303, - "real_time": 2.3153245744138662e+00, - "cpu_time": 2.3151734257425751e+00, + "iterations": 291, + "real_time": 2.3570958640157560e+00, + "cpu_time": 2.3570359209621992e+00, "time_unit": "ms" }, { @@ -88,8 +88,8 @@ "repetition_index": 0, "threads": 1, "iterations": 145, - "real_time": 4.8356864472915388e+00, - "cpu_time": 4.8354401310344803e+00, + "real_time": 4.8382232415265047e+00, + "cpu_time": 4.8375873103448299e+00, "time_unit": "ms" }, { @@ -102,8 +102,8 @@ "repetition_index": 0, "threads": 1, "iterations": 78, - "real_time": 9.0294344207415218e+00, - "cpu_time": 9.0290102307692237e+00, + "real_time": 8.9898192538664894e+00, + "cpu_time": 8.9885258333333269e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4647, - "real_time": 1.4970531277895641e-01, - "cpu_time": 1.4969578330105443e-01, + "iterations": 4732, + "real_time": 1.4900763708836731e-01, + "cpu_time": 1.4898793300929827e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2568, - "real_time": 2.7367153358747281e-01, - "cpu_time": 2.7365799805295959e-01, + "iterations": 2567, + "real_time": 2.7684785533053519e-01, + "cpu_time": 2.7677605103233366e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101049, - "real_time": 6.8725437557921798e-03, - "cpu_time": 6.8721282645053334e-03, + "iterations": 100753, + "real_time": 7.1818906740909278e-03, + "cpu_time": 7.1817220132403070e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48490, - "real_time": 1.6256974490309873e-02, - "cpu_time": 1.6256173540936274e-02, + "iterations": 47612, + "real_time": 1.4661686260374937e-02, + "cpu_time": 1.4661306267327555e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2701, - "real_time": 2.5604368194038452e-01, - "cpu_time": 2.5602443983709744e-01, + "iterations": 2204, + "real_time": 3.2491934825471869e-01, + "cpu_time": 3.2490066288566188e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2749, - "real_time": 2.5215670845863297e-01, - "cpu_time": 2.5215243179337943e-01, + "iterations": 2109, + "real_time": 3.3516474085828601e-01, + "cpu_time": 3.3511585395922289e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1779, - "real_time": 4.4195852156031190e-01, - "cpu_time": 4.4191965486228230e-01, + "iterations": 1000, + "real_time": 6.0891722515225410e-01, + "cpu_time": 6.0877587399999911e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1747, - "real_time": 4.3807942345610468e-01, - "cpu_time": 4.3805371150543754e-01, + "iterations": 1056, + "real_time": 5.6265958471957478e-01, + "cpu_time": 5.6263823579545469e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 686, - "real_time": 1.0212933530612869e+00, - "cpu_time": 1.0212425087463568e+00, + "iterations": 665, + "real_time": 1.0144153091692387e+00, + "cpu_time": 1.0142662390977437e+00, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 678, - "real_time": 1.0082492423532283e+00, - "cpu_time": 1.0082195176991142e+00, + "iterations": 687, + "real_time": 9.8415326972368677e-01, + "cpu_time": 9.8413417176128126e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4907, - "real_time": 1.4108774486252751e-01, - "cpu_time": 1.4108310148767086e-01, + "iterations": 4827, + "real_time": 1.4460108045936990e-01, + "cpu_time": 1.4459874000414299e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3151, - "real_time": 2.2556108071742759e-01, - "cpu_time": 2.2554901872421484e-01, + "iterations": 3090, + "real_time": 2.2893413467314636e-01, + "cpu_time": 2.2890716181229737e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3125, - "real_time": 2.2625566363334657e-01, - "cpu_time": 2.2624866016000056e-01, + "iterations": 3038, + "real_time": 2.3062362374248593e-01, + "cpu_time": 2.3059413199473325e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2563, - "real_time": 2.6822397270985893e-01, - "cpu_time": 2.6821861724541574e-01, + "iterations": 2627, + "real_time": 2.6741435983174650e-01, + "cpu_time": 2.6737748953178492e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2586, - "real_time": 2.6990216274364737e-01, - "cpu_time": 2.6988920069605538e-01, + "iterations": 2614, + "real_time": 2.6675470190460482e-01, + "cpu_time": 2.6671898355011386e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2636, - "real_time": 2.6586177378712006e-01, - "cpu_time": 2.6585147420333838e-01, + "iterations": 2680, + "real_time": 2.6224578188648867e-01, + "cpu_time": 2.6221102089552206e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4852, - "real_time": 1.4311890367354033e-01, - "cpu_time": 1.4311650906842524e-01, + "iterations": 4958, + "real_time": 1.4103486151188407e-01, + "cpu_time": 1.4101694735780548e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 70ceea58..e2188404 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:11:10+00:00 +2025-09-07T14:27:26+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.50, 3.49, 5.35 +Load Average: 3.21, 3.54, 4.50 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 22.7 ms 22.7 ms 31 -MLIR_Conv2D/1 69.1 ms 69.1 ms 10 -Buddy_Conv2D/1 2.32 ms 2.32 ms 303 +Eigen_Convolve2D/1 22.6 ms 22.6 ms 31 +MLIR_Conv2D/1 70.4 ms 70.4 ms 10 +Buddy_Conv2D/1 2.36 ms 2.36 ms 291 Buddy_Corr2D_Constant_Padding/1 4.84 ms 4.84 ms 145 -OpenCV_Filter2D_Constant_Padding/1 9.03 ms 9.03 ms 78 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4647 -Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2568 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101049 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.016 ms 0.016 ms 48490 -Buddy_Erosion2D_Constant_Padding/1 0.256 ms 0.256 ms 2701 -Buddy_Dilation2D_Constant_Padding/1 0.252 ms 0.252 ms 2749 -Buddy_Opening2D_Constant_Padding/1 0.442 ms 0.442 ms 1779 -Buddy_Closing2D_Constant_Padding/1 0.438 ms 0.438 ms 1747 -Buddy_TopHat2D_Constant_Padding/1 1.02 ms 1.02 ms 686 -Buddy_BottomHat2D_Constant_Padding/1 1.01 ms 1.01 ms 678 -OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4907 -OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3151 -OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3125 -OpenCV_TopHat2D_Constant_Padding/1 0.268 ms 0.268 ms 2563 -OpenCV_BottomHat2D_Constant_Padding/1 0.270 ms 0.270 ms 2586 -OpenCV_MorphGrad2D_Constant_Padding/1 0.266 ms 0.266 ms 2636 -OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4852 +OpenCV_Filter2D_Constant_Padding/1 8.99 ms 8.99 ms 78 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4732 +Buddy_Resize2D_Bilinear_Interpolation/1 0.277 ms 0.277 ms 2567 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 100753 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47612 +Buddy_Erosion2D_Constant_Padding/1 0.325 ms 0.325 ms 2204 +Buddy_Dilation2D_Constant_Padding/1 0.335 ms 0.335 ms 2109 +Buddy_Opening2D_Constant_Padding/1 0.609 ms 0.609 ms 1000 +Buddy_Closing2D_Constant_Padding/1 0.563 ms 0.563 ms 1056 +Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 665 +Buddy_BottomHat2D_Constant_Padding/1 0.984 ms 0.984 ms 687 +OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4827 +OpenCV_Opening2D_Constant_Padding/1 0.229 ms 0.229 ms 3090 +OpenCV_Closing2D_Constant_Padding/1 0.231 ms 0.231 ms 3038 +OpenCV_TopHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2627 +OpenCV_BottomHat2D_Constant_Padding/1 0.267 ms 0.267 ms 2614 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2680 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4958 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 71ca4698..1dbbecce 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:11:35+00:00", + "date": "2025-09-07T14:27:51+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.59424,3.43213,5.27881], + "load_avg": [3.13721,3.49268,4.45801], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 31, - "real_time": 2.2505127254032320e+01, - "cpu_time": 2.2503875387096770e+01, + "real_time": 2.2485823640900275e+01, + "cpu_time": 2.2485474354838711e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 10, - "real_time": 6.9353722408413887e+01, - "cpu_time": 6.9350002200000006e+01, + "real_time": 7.0325020700693130e+01, + "cpu_time": 7.0323628100000008e+01, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 294, - "real_time": 2.3809186860817628e+00, - "cpu_time": 2.3807380238095233e+00, + "iterations": 304, + "real_time": 2.3227319930140911e+00, + "cpu_time": 2.3226553092105280e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 145, - "real_time": 4.8076913788400848e+00, - "cpu_time": 4.8074658482758617e+00, + "iterations": 144, + "real_time": 4.8592611629929809e+00, + "cpu_time": 4.8591047083333336e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 77, - "real_time": 9.0552678742966091e+00, - "cpu_time": 9.0549782467532474e+00, + "iterations": 78, + "real_time": 8.9812785004958133e+00, + "cpu_time": 8.9809005512820512e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4641, - "real_time": 1.5078150530591963e-01, - "cpu_time": 1.5077307412195642e-01, + "iterations": 4740, + "real_time": 1.4794336765501570e-01, + "cpu_time": 1.4793731962025314e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2561, - "real_time": 2.7482259155995487e-01, - "cpu_time": 2.7480107887543898e-01, + "iterations": 2579, + "real_time": 2.7309454177275516e-01, + "cpu_time": 2.7308133927879030e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 102307, - "real_time": 6.8564598795962314e-03, - "cpu_time": 6.8562642438933811e-03, + "iterations": 101192, + "real_time": 6.9005510118396131e-03, + "cpu_time": 6.9002890544707105e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48525, - "real_time": 1.4494471271157203e-02, - "cpu_time": 1.4493903987635266e-02, + "iterations": 47570, + "real_time": 1.4686870232258922e-02, + "cpu_time": 1.4686363212108497e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2896, - "real_time": 2.4304971181226698e-01, - "cpu_time": 2.4304034495856303e-01, + "iterations": 2503, + "real_time": 3.0775897315012374e-01, + "cpu_time": 3.0774776468238130e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2854, - "real_time": 2.4745649362495920e-01, - "cpu_time": 2.4743058479327271e-01, + "iterations": 2313, + "real_time": 2.9183508988535872e-01, + "cpu_time": 2.9181160397751793e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1734, - "real_time": 4.0102887129563636e-01, - "cpu_time": 4.0101697808535247e-01, + "iterations": 1118, + "real_time": 5.0250999962707754e-01, + "cpu_time": 5.0250005456171720e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1783, - "real_time": 4.0142091886420900e-01, - "cpu_time": 4.0141442512619180e-01, + "iterations": 1097, + "real_time": 4.7140384098020810e-01, + "cpu_time": 4.7139598450319076e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 652, - "real_time": 1.0057697529715994e+00, - "cpu_time": 1.0057539616564428e+00, + "iterations": 739, + "real_time": 9.4388246395107211e-01, + "cpu_time": 9.4387578755074519e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 668, - "real_time": 1.0270904693864062e+00, - "cpu_time": 1.0270344431137737e+00, + "iterations": 700, + "real_time": 9.3932237476110458e-01, + "cpu_time": 9.3930383428571462e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4769, - "real_time": 1.5437182739711353e-01, - "cpu_time": 1.5436570203396952e-01, + "iterations": 4835, + "real_time": 1.4465527980749215e-01, + "cpu_time": 1.4465337456049635e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2758, - "real_time": 2.2677743187453800e-01, - "cpu_time": 2.2676782378535207e-01, + "iterations": 3096, + "real_time": 2.3360328763261323e-01, + "cpu_time": 2.3359945413436684e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3085, - "real_time": 2.2627895447384788e-01, - "cpu_time": 2.2626945510534760e-01, + "iterations": 3103, + "real_time": 2.2638490458976213e-01, + "cpu_time": 2.2637974830808869e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2647, - "real_time": 2.6378509263790550e-01, - "cpu_time": 2.6376938307517955e-01, + "iterations": 2686, + "real_time": 2.6290406343303147e-01, + "cpu_time": 2.6289967795979169e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2665, - "real_time": 2.6252820920094316e-01, - "cpu_time": 2.6251876210131225e-01, + "iterations": 2642, + "real_time": 2.6402394037008464e-01, + "cpu_time": 2.6402097728993146e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2628, - "real_time": 2.6982235956137585e-01, - "cpu_time": 2.6981582077625482e-01, + "iterations": 2681, + "real_time": 2.6178075412634993e-01, + "cpu_time": 2.6177909585975329e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4915, - "real_time": 1.4328015390271936e-01, - "cpu_time": 1.4327507243133245e-01, + "iterations": 4948, + "real_time": 1.4226491018684043e-01, + "cpu_time": 1.4226395371867465e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index e226cf28..50b485e5 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel7x7KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:11:35+00:00 +2025-09-07T14:27:51+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.59, 3.43, 5.28 +Load Average: 3.14, 3.49, 4.46 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- Eigen_Convolve2D/1 22.5 ms 22.5 ms 31 -MLIR_Conv2D/1 69.4 ms 69.4 ms 10 -Buddy_Conv2D/1 2.38 ms 2.38 ms 294 -Buddy_Corr2D_Constant_Padding/1 4.81 ms 4.81 ms 145 -OpenCV_Filter2D_Constant_Padding/1 9.06 ms 9.05 ms 77 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.151 ms 0.151 ms 4641 -Buddy_Resize2D_Bilinear_Interpolation/1 0.275 ms 0.275 ms 2561 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 102307 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.014 ms 0.014 ms 48525 -Buddy_Erosion2D_Constant_Padding/1 0.243 ms 0.243 ms 2896 -Buddy_Dilation2D_Constant_Padding/1 0.247 ms 0.247 ms 2854 -Buddy_Opening2D_Constant_Padding/1 0.401 ms 0.401 ms 1734 -Buddy_Closing2D_Constant_Padding/1 0.401 ms 0.401 ms 1783 -Buddy_TopHat2D_Constant_Padding/1 1.01 ms 1.01 ms 652 -Buddy_BottomHat2D_Constant_Padding/1 1.03 ms 1.03 ms 668 -OpenCV_Erode2D_Constant_Padding/1 0.154 ms 0.154 ms 4769 -OpenCV_Opening2D_Constant_Padding/1 0.227 ms 0.227 ms 2758 -OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3085 -OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2647 -OpenCV_BottomHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2665 -OpenCV_MorphGrad2D_Constant_Padding/1 0.270 ms 0.270 ms 2628 -OpenCV_Dilate2D_Constant_Padding/1 0.143 ms 0.143 ms 4915 +MLIR_Conv2D/1 70.3 ms 70.3 ms 10 +Buddy_Conv2D/1 2.32 ms 2.32 ms 304 +Buddy_Corr2D_Constant_Padding/1 4.86 ms 4.86 ms 144 +OpenCV_Filter2D_Constant_Padding/1 8.98 ms 8.98 ms 78 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4740 +Buddy_Resize2D_Bilinear_Interpolation/1 0.273 ms 0.273 ms 2579 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101192 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47570 +Buddy_Erosion2D_Constant_Padding/1 0.308 ms 0.308 ms 2503 +Buddy_Dilation2D_Constant_Padding/1 0.292 ms 0.292 ms 2313 +Buddy_Opening2D_Constant_Padding/1 0.503 ms 0.503 ms 1118 +Buddy_Closing2D_Constant_Padding/1 0.471 ms 0.471 ms 1097 +Buddy_TopHat2D_Constant_Padding/1 0.944 ms 0.944 ms 739 +Buddy_BottomHat2D_Constant_Padding/1 0.939 ms 0.939 ms 700 +OpenCV_Erode2D_Constant_Padding/1 0.145 ms 0.145 ms 4835 +OpenCV_Opening2D_Constant_Padding/1 0.234 ms 0.234 ms 3096 +OpenCV_Closing2D_Constant_Padding/1 0.226 ms 0.226 ms 3103 +OpenCV_TopHat2D_Constant_Padding/1 0.263 ms 0.263 ms 2686 +OpenCV_BottomHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2642 +OpenCV_MorphGrad2D_Constant_Padding/1 0.262 ms 0.262 ms 2681 +OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4948 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json index 86e96502..7142618a 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:12:00+00:00", + "date": "2025-09-07T14:28:14+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.81396,3.41357,5.22266], + "load_avg": [3.08936,3.45215,4.41846], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 19, - "real_time": 3.5926808652124905e+01, - "cpu_time": 3.5924946894736848e+01, + "real_time": 3.6411155799501820e+01, + "cpu_time": 3.6410112789473686e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 6, - "real_time": 1.2407693142692249e+02, - "cpu_time": 1.2407143333333333e+02, + "real_time": 1.2272199243307114e+02, + "cpu_time": 1.2271996516666665e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 174, - "real_time": 4.0391601365188077e+00, - "cpu_time": 4.0390005804597706e+00, + "iterations": 164, + "real_time": 4.2625103463850369e+00, + "cpu_time": 4.2624786829268295e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 86, - "real_time": 8.1013258286686831e+00, - "cpu_time": 8.1008378488372106e+00, + "iterations": 87, + "real_time": 8.0896045627265138e+00, + "cpu_time": 8.0895408160919526e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 115, - "real_time": 5.9959024190902710e+00, - "cpu_time": 5.9957746086956556e+00, + "iterations": 116, + "real_time": 6.0323840328331650e+00, + "cpu_time": 6.0323151551724177e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4686, - "real_time": 1.4948366170910449e-01, - "cpu_time": 1.4947746414852756e-01, + "iterations": 4740, + "real_time": 1.4794642962358168e-01, + "cpu_time": 1.4794533839662441e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2562, - "real_time": 2.7427126585878869e-01, - "cpu_time": 2.7425979039812648e-01, + "iterations": 2578, + "real_time": 2.7431065723866432e-01, + "cpu_time": 2.7430675096974405e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101826, - "real_time": 6.8600357443400397e-03, - "cpu_time": 6.8597227525386429e-03, + "iterations": 101183, + "real_time": 6.9150012404555119e-03, + "cpu_time": 6.9149498927685484e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 47952, - "real_time": 1.4625412179234826e-02, - "cpu_time": 1.4625162954621277e-02, + "iterations": 47997, + "real_time": 1.4597131495282340e-02, + "cpu_time": 1.4596917932370775e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2816, - "real_time": 2.4167992656161499e-01, - "cpu_time": 2.4167627130681854e-01, + "iterations": 2553, + "real_time": 2.8082992789608052e-01, + "cpu_time": 2.8082769095182175e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2869, - "real_time": 2.4284223862623411e-01, - "cpu_time": 2.4281936911816007e-01, + "iterations": 2317, + "real_time": 2.6909916641180198e-01, + "cpu_time": 2.6909711523521840e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1716, - "real_time": 3.8663555796329790e-01, - "cpu_time": 3.8661013053613069e-01, + "iterations": 1198, + "real_time": 4.8579790867330236e-01, + "cpu_time": 4.8579480467445768e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1845, - "real_time": 3.9366766366209116e-01, - "cpu_time": 3.9364567262872591e-01, + "iterations": 1256, + "real_time": 4.3843196005008783e-01, + "cpu_time": 4.3842444347133841e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 681, - "real_time": 1.0337662909086931e+00, - "cpu_time": 1.0336980954478709e+00, + "iterations": 721, + "real_time": 9.3752561180998317e-01, + "cpu_time": 9.3750926213592223e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 743, - "real_time": 9.9821302984636850e-01, - "cpu_time": 9.9815262180349729e-01, + "iterations": 740, + "real_time": 9.3545040770156962e-01, + "cpu_time": 9.3543432297297435e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4848, - "real_time": 1.4389439781039659e-01, - "cpu_time": 1.4388865037128742e-01, + "iterations": 4947, + "real_time": 1.4094690816171584e-01, + "cpu_time": 1.4094593612290271e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3034, - "real_time": 2.3212716666074198e-01, - "cpu_time": 2.3211989123269525e-01, + "iterations": 3152, + "real_time": 2.2193788063858971e-01, + "cpu_time": 2.2193630583756352e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3013, - "real_time": 2.3206682561087205e-01, - "cpu_time": 2.3205983637570513e-01, + "iterations": 3129, + "real_time": 2.2150588496571799e-01, + "cpu_time": 2.2150361585171055e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2605, - "real_time": 2.6449215475062260e-01, - "cpu_time": 2.6447615969289784e-01, + "iterations": 2742, + "real_time": 2.5509349882254889e-01, + "cpu_time": 2.5509168016046663e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2599, - "real_time": 2.6553470344670049e-01, - "cpu_time": 2.6552657983839900e-01, + "iterations": 2746, + "real_time": 2.5341272739407272e-01, + "cpu_time": 2.5341093226511219e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2579, - "real_time": 2.7055674661911472e-01, - "cpu_time": 2.7054348817371016e-01, + "iterations": 2689, + "real_time": 2.6112427472979571e-01, + "cpu_time": 2.6112239866121267e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4823, - "real_time": 1.4378969109011419e-01, - "cpu_time": 1.4378343479162370e-01, + "iterations": 4988, + "real_time": 1.4059869355933610e-01, + "cpu_time": 1.4059724599037698e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log index 34454b9e..268cf92d 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_CONSTANT_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:12:00+00:00 +2025-09-07T14:28:14+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.81, 3.41, 5.22 +Load Average: 3.09, 3.45, 4.42 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 35.9 ms 35.9 ms 19 -MLIR_Conv2D/1 124 ms 124 ms 6 -Buddy_Conv2D/1 4.04 ms 4.04 ms 174 -Buddy_Corr2D_Constant_Padding/1 8.10 ms 8.10 ms 86 -OpenCV_Filter2D_Constant_Padding/1 6.00 ms 6.00 ms 115 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.149 ms 0.149 ms 4686 -Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2562 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101826 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47952 -Buddy_Erosion2D_Constant_Padding/1 0.242 ms 0.242 ms 2816 -Buddy_Dilation2D_Constant_Padding/1 0.243 ms 0.243 ms 2869 -Buddy_Opening2D_Constant_Padding/1 0.387 ms 0.387 ms 1716 -Buddy_Closing2D_Constant_Padding/1 0.394 ms 0.394 ms 1845 -Buddy_TopHat2D_Constant_Padding/1 1.03 ms 1.03 ms 681 -Buddy_BottomHat2D_Constant_Padding/1 0.998 ms 0.998 ms 743 -OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4848 -OpenCV_Opening2D_Constant_Padding/1 0.232 ms 0.232 ms 3034 -OpenCV_Closing2D_Constant_Padding/1 0.232 ms 0.232 ms 3013 -OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2605 -OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2599 -OpenCV_MorphGrad2D_Constant_Padding/1 0.271 ms 0.271 ms 2579 -OpenCV_Dilate2D_Constant_Padding/1 0.144 ms 0.144 ms 4823 +Eigen_Convolve2D/1 36.4 ms 36.4 ms 19 +MLIR_Conv2D/1 123 ms 123 ms 6 +Buddy_Conv2D/1 4.26 ms 4.26 ms 164 +Buddy_Corr2D_Constant_Padding/1 8.09 ms 8.09 ms 87 +OpenCV_Filter2D_Constant_Padding/1 6.03 ms 6.03 ms 116 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4740 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2578 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101183 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47997 +Buddy_Erosion2D_Constant_Padding/1 0.281 ms 0.281 ms 2553 +Buddy_Dilation2D_Constant_Padding/1 0.269 ms 0.269 ms 2317 +Buddy_Opening2D_Constant_Padding/1 0.486 ms 0.486 ms 1198 +Buddy_Closing2D_Constant_Padding/1 0.438 ms 0.438 ms 1256 +Buddy_TopHat2D_Constant_Padding/1 0.938 ms 0.938 ms 721 +Buddy_BottomHat2D_Constant_Padding/1 0.935 ms 0.935 ms 740 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4947 +OpenCV_Opening2D_Constant_Padding/1 0.222 ms 0.222 ms 3152 +OpenCV_Closing2D_Constant_Padding/1 0.222 ms 0.222 ms 3129 +OpenCV_TopHat2D_Constant_Padding/1 0.255 ms 0.255 ms 2742 +OpenCV_BottomHat2D_Constant_Padding/1 0.253 ms 0.253 ms 2746 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2689 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4988 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json index 2251ffdf..d7045e77 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:12:25+00:00", + "date": "2025-09-07T14:28:38+00:00", "host_name": "4ed4bacfe45d", "executable": "./bin/image-processing-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [2.86768,3.38574,5.17383], + "load_avg": [3.06299,3.42188,4.38721], "library_build_type": "release" }, "benchmarks": [ @@ -46,8 +46,8 @@ "repetition_index": 0, "threads": 1, "iterations": 19, - "real_time": 3.6201816836470051e+01, - "cpu_time": 3.6201301105263163e+01, + "real_time": 3.6139344306368578e+01, + "cpu_time": 3.6132733684210528e+01, "time_unit": "ms" }, { @@ -60,8 +60,8 @@ "repetition_index": 0, "threads": 1, "iterations": 6, - "real_time": 1.2358936294913292e+02, - "cpu_time": 1.2358072799999997e+02, + "real_time": 1.2281656203170617e+02, + "cpu_time": 1.2280101033333335e+02, "time_unit": "ms" }, { @@ -73,9 +73,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 164, - "real_time": 4.2949264369359827e+00, - "cpu_time": 4.2947466402439005e+00, + "iterations": 174, + "real_time": 4.0128050136497651e+00, + "cpu_time": 4.0122547816091965e+00, "time_unit": "ms" }, { @@ -87,9 +87,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 87, - "real_time": 8.1035070035649444e+00, - "cpu_time": 8.1031966321839040e+00, + "iterations": 86, + "real_time": 8.0886327683232544e+00, + "cpu_time": 8.0876173023255777e+00, "time_unit": "ms" }, { @@ -101,9 +101,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 114, - "real_time": 6.0135007166025929e+00, - "cpu_time": 6.0134500877193009e+00, + "iterations": 116, + "real_time": 6.0190088276205396e+00, + "cpu_time": 6.0181940862068943e+00, "time_unit": "ms" }, { @@ -115,9 +115,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4610, - "real_time": 1.5021250573130338e-01, - "cpu_time": 1.5020438633405644e-01, + "iterations": 4751, + "real_time": 1.4789280477912017e-01, + "cpu_time": 1.4787357756261846e-01, "time_unit": "ms" }, { @@ -129,9 +129,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2555, - "real_time": 2.7383145549992527e-01, - "cpu_time": 2.7382016086105654e-01, + "iterations": 2581, + "real_time": 2.7365781617968271e-01, + "cpu_time": 2.7361916388996527e-01, "time_unit": "ms" }, { @@ -143,9 +143,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 101810, - "real_time": 6.8629608209563337e-03, - "cpu_time": 6.8627586877516959e-03, + "iterations": 101075, + "real_time": 6.9230752152851715e-03, + "cpu_time": 6.9229836655948533e-03, "time_unit": "ms" }, { @@ -157,9 +157,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 48007, - "real_time": 1.4609261997405542e-02, - "cpu_time": 1.4608820255379421e-02, + "iterations": 47881, + "real_time": 1.4611373583860823e-02, + "cpu_time": 1.4611187402101052e-02, "time_unit": "ms" }, { @@ -171,9 +171,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2937, - "real_time": 2.5447246713381783e-01, - "cpu_time": 2.5446095233231197e-01, + "iterations": 2510, + "real_time": 3.0442776225240109e-01, + "cpu_time": 3.0439000358565693e-01, "time_unit": "ms" }, { @@ -185,9 +185,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2835, - "real_time": 2.5096868265032557e-01, - "cpu_time": 2.5096189135802438e-01, + "iterations": 2091, + "real_time": 2.6490774481334167e-01, + "cpu_time": 2.6486507604017184e-01, "time_unit": "ms" }, { @@ -199,9 +199,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1713, - "real_time": 4.0167628396552552e-01, - "cpu_time": 4.0166695913601819e-01, + "iterations": 1262, + "real_time": 4.7815910196625488e-01, + "cpu_time": 4.7810812282091797e-01, "time_unit": "ms" }, { @@ -213,9 +213,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 1732, - "real_time": 3.8730230733029564e-01, - "cpu_time": 3.8729183602771361e-01, + "iterations": 1152, + "real_time": 4.8954437241061693e-01, + "cpu_time": 4.8945949305555547e-01, "time_unit": "ms" }, { @@ -227,9 +227,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 678, - "real_time": 1.0340484875669169e+00, - "cpu_time": 1.0340169734513285e+00, + "iterations": 713, + "real_time": 9.2543011627892791e-01, + "cpu_time": 9.2528858064516162e-01, "time_unit": "ms" }, { @@ -241,9 +241,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 688, - "real_time": 1.0315462307961181e+00, - "cpu_time": 1.0315082732558134e+00, + "iterations": 733, + "real_time": 9.2815726710862057e-01, + "cpu_time": 9.2803548840382022e-01, "time_unit": "ms" }, { @@ -255,9 +255,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4897, - "real_time": 1.4402208728451424e-01, - "cpu_time": 1.4401725382887512e-01, + "iterations": 4956, + "real_time": 1.4103952227002484e-01, + "cpu_time": 1.4102089689265529e-01, "time_unit": "ms" }, { @@ -269,9 +269,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3175, - "real_time": 2.1922240811070121e-01, - "cpu_time": 2.1921639622047273e-01, + "iterations": 3097, + "real_time": 2.2641591508811160e-01, + "cpu_time": 2.2638668517920651e-01, "time_unit": "ms" }, { @@ -283,9 +283,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 3139, - "real_time": 2.1930135176530421e-01, - "cpu_time": 2.1929385313794220e-01, + "iterations": 3063, + "real_time": 2.2829655792601858e-01, + "cpu_time": 2.2829427815866776e-01, "time_unit": "ms" }, { @@ -297,9 +297,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2708, - "real_time": 2.5645486922436661e-01, - "cpu_time": 2.5644250147710440e-01, + "iterations": 2628, + "real_time": 2.6419852495964624e-01, + "cpu_time": 2.6419667199391139e-01, "time_unit": "ms" }, { @@ -311,9 +311,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2786, - "real_time": 2.5142605061464318e-01, - "cpu_time": 2.5141716403445868e-01, + "iterations": 2623, + "real_time": 2.6604910979414548e-01, + "cpu_time": 2.6604722645825318e-01, "time_unit": "ms" }, { @@ -325,9 +325,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 2645, - "real_time": 2.6354777762254378e-01, - "cpu_time": 2.6354556105860183e-01, + "iterations": 2690, + "real_time": 2.6090611395561120e-01, + "cpu_time": 2.6090426319702570e-01, "time_unit": "ms" }, { @@ -339,9 +339,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 4951, - "real_time": 1.4152701088597716e-01, - "cpu_time": 1.4151987901434010e-01, + "iterations": 4952, + "real_time": 1.4145706474829181e-01, + "cpu_time": 1.4145561409531551e-01, "time_unit": "ms" } ] diff --git a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log index f84fcd1c..34c65b95 100644 --- a/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log +++ b/test_result/imageprocessing/AVX2_YuTu_sobel9x9KernelAlign_random3x3KernelAlignInt_REPLICATE_PADDING.log @@ -1,4 +1,4 @@ -2025-09-07T13:12:25+00:00 +2025-09-07T14:28:38+00:00 Running ./bin/image-processing-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,33 +6,33 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 2.87, 3.39, 5.17 +Load Average: 3.06, 3.42, 4.39 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- -Eigen_Convolve2D/1 36.2 ms 36.2 ms 19 -MLIR_Conv2D/1 124 ms 124 ms 6 -Buddy_Conv2D/1 4.29 ms 4.29 ms 164 -Buddy_Corr2D_Constant_Padding/1 8.10 ms 8.10 ms 87 -OpenCV_Filter2D_Constant_Padding/1 6.01 ms 6.01 ms 114 -Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.150 ms 0.150 ms 4610 -Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2555 -OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101810 -OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 48007 -Buddy_Erosion2D_Constant_Padding/1 0.254 ms 0.254 ms 2937 -Buddy_Dilation2D_Constant_Padding/1 0.251 ms 0.251 ms 2835 -Buddy_Opening2D_Constant_Padding/1 0.402 ms 0.402 ms 1713 -Buddy_Closing2D_Constant_Padding/1 0.387 ms 0.387 ms 1732 -Buddy_TopHat2D_Constant_Padding/1 1.03 ms 1.03 ms 678 -Buddy_BottomHat2D_Constant_Padding/1 1.03 ms 1.03 ms 688 -OpenCV_Erode2D_Constant_Padding/1 0.144 ms 0.144 ms 4897 -OpenCV_Opening2D_Constant_Padding/1 0.219 ms 0.219 ms 3175 -OpenCV_Closing2D_Constant_Padding/1 0.219 ms 0.219 ms 3139 -OpenCV_TopHat2D_Constant_Padding/1 0.256 ms 0.256 ms 2708 -OpenCV_BottomHat2D_Constant_Padding/1 0.251 ms 0.251 ms 2786 -OpenCV_MorphGrad2D_Constant_Padding/1 0.264 ms 0.264 ms 2645 -OpenCV_Dilate2D_Constant_Padding/1 0.142 ms 0.142 ms 4951 +Eigen_Convolve2D/1 36.1 ms 36.1 ms 19 +MLIR_Conv2D/1 123 ms 123 ms 6 +Buddy_Conv2D/1 4.01 ms 4.01 ms 174 +Buddy_Corr2D_Constant_Padding/1 8.09 ms 8.09 ms 86 +OpenCV_Filter2D_Constant_Padding/1 6.02 ms 6.02 ms 116 +Buddy_Resize2D_Nearest_Neighbour_Interpolation/1 0.148 ms 0.148 ms 4751 +Buddy_Resize2D_Bilinear_Interpolation/1 0.274 ms 0.274 ms 2581 +OpenCV_Resize2D_Nearest_Neighbour_Interpolation/1 0.007 ms 0.007 ms 101075 +OpenCV_Resize2D_Bilinear_Interpolation/1 0.015 ms 0.015 ms 47881 +Buddy_Erosion2D_Constant_Padding/1 0.304 ms 0.304 ms 2510 +Buddy_Dilation2D_Constant_Padding/1 0.265 ms 0.265 ms 2091 +Buddy_Opening2D_Constant_Padding/1 0.478 ms 0.478 ms 1262 +Buddy_Closing2D_Constant_Padding/1 0.490 ms 0.489 ms 1152 +Buddy_TopHat2D_Constant_Padding/1 0.925 ms 0.925 ms 713 +Buddy_BottomHat2D_Constant_Padding/1 0.928 ms 0.928 ms 733 +OpenCV_Erode2D_Constant_Padding/1 0.141 ms 0.141 ms 4956 +OpenCV_Opening2D_Constant_Padding/1 0.226 ms 0.226 ms 3097 +OpenCV_Closing2D_Constant_Padding/1 0.228 ms 0.228 ms 3063 +OpenCV_TopHat2D_Constant_Padding/1 0.264 ms 0.264 ms 2628 +OpenCV_BottomHat2D_Constant_Padding/1 0.266 ms 0.266 ms 2623 +OpenCV_MorphGrad2D_Constant_Padding/1 0.261 ms 0.261 ms 2690 +OpenCV_Dilate2D_Constant_Padding/1 0.141 ms 0.141 ms 4952 Saved PNG file. Saved PNG file. Saved PNG file. diff --git a/test_result/imageprocessing/image-processing-result.log b/test_result/imageprocessing/image-processing-result.log index e709d0c2..8b8610c9 100644 --- a/test_result/imageprocessing/image-processing-result.log +++ b/test_result/imageprocessing/image-processing-result.log @@ -85,3 +85,45 @@ Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign rando Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING [Success] … Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel5x5KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel7x7KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png sobel9x9KernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png laplacianKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt CONSTANT_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Running: ../benchmarks/ImageProcessing/Images/YuTu.png logKernelAlign random3x3KernelAlignInt REPLICATE_PADDING +[Success] … +Testing AVX512 support +CPU does not support AVX512. +Testing NEON support +CPU does not support NEON. +[Success] … +Testing AVX512 support +CPU does not support AVX512. +Testing NEON support +CPU does not support NEON. diff --git a/test_result/vectorization/vectorization_matrix.json b/test_result/vectorization/vectorization_matrix.json index e63f2898..90867db0 100644 --- a/test_result/vectorization/vectorization_matrix.json +++ b/test_result/vectorization/vectorization_matrix.json @@ -1,6 +1,6 @@ { "context": { - "date": "2025-09-07T13:14:33+00:00", + "date": "2025-09-07T14:30:43+00:00", "host_name": "4ed4bacfe45d", "executable": "./vectorization-matrix-benchmark", "num_cpus": 24, @@ -32,7 +32,7 @@ "num_sharing": 24 } ], - "load_avg": [3.06299,3.26074,4.88916], + "load_avg": [2.97803,3.27148,4.20654], "library_build_type": "release" }, "benchmarks": [ @@ -45,9 +45,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 36245532, - "real_time": 1.9330042278500677e+01, - "cpu_time": 1.9329765362527993e+01, + "iterations": 36434213, + "real_time": 1.9358822269403905e+01, + "cpu_time": 1.9357879145077188e+01, "time_unit": "ns" }, { @@ -59,9 +59,9 @@ "repetitions": 1, "repetition_index": 0, "threads": 1, - "iterations": 33833597, - "real_time": 2.1012985950814887e+01, - "cpu_time": 2.1012694364125689e+01, + "iterations": 34006039, + "real_time": 2.0755498137698094e+01, + "cpu_time": 2.0755264822227605e+01, "time_unit": "ns" } ] diff --git a/test_result/vectorization/vectorization_matrix.log b/test_result/vectorization/vectorization_matrix.log index 11e33aa6..3fa79ef0 100644 --- a/test_result/vectorization/vectorization_matrix.log +++ b/test_result/vectorization/vectorization_matrix.log @@ -1,4 +1,4 @@ -2025-09-07T13:14:33+00:00 +2025-09-07T14:30:43+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -6,13 +6,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.06, 3.26, 4.89 +Load Average: 2.98, 3.27, 4.21 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 19.3 ns 19.3 ns 36245532 -MLIR_MatVec/1 21.0 ns 21.0 ns 33833597 +MLIR_MatMul/1 19.4 ns 19.4 ns 36434213 +MLIR_MatVec/1 20.8 ns 20.8 ns 34006039 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] diff --git a/test_result/vectorization/vectorization_result.log b/test_result/vectorization/vectorization_result.log index fd5ab9b0..350170f5 100644 --- a/test_result/vectorization/vectorization_result.log +++ b/test_result/vectorization/vectorization_result.log @@ -1,6 +1,20 @@ -Vectorization Benchmark - Sun Sep 7 13:14:27 UTC 2025 +Vectorization Benchmark - Sun Sep 7 14:30:36 UTC 2025 [Info] Starting vectorization-matrix-benchmark build... [Info] Running CMake configuration... +-- Detecting CXX compiler ABI info - failed +-- Check for working CXX compiler: /usr/bin/c++ +CMake Error: CMAKE_CXX_COMPILER not set, after EnableLanguage +CMake Error at /usr/share/cmake-3.22/Modules/CMakeTestCXXCompiler.cmake:49 (try_compile): + Failed to configure test project build system. +Call Stack (most recent call first): + CMakeLists.txt:11 (project) + + +-- Configuring incomplete, errors occurred! +See also "/home/buddy-complier-workspace/buddy-benchmark/build/CMakeFiles/CMakeOutput.log". +See also "/home/buddy-complier-workspace/buddy-benchmark/build/CMakeFiles/CMakeError.log". +[Info] Building vectorization-matrix-benchmark... +ninja: error: loading 'build.ninja': No such file or directory -- The CXX compiler identification is GNU 11.4.0 -- The C compiler identification is GNU 11.4.0 -- Detecting CXX compiler ABI info @@ -129,22 +143,22 @@ Call Stack (most recent call first): -- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build/vendor/benchmark/src/project_googlebenchmark-build [10/17] Performing build step for 'project_googlebenchmark' [1/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_name.cc.o -[2/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o -[3/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o -[4/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o -[5/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o -[6/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o -[7/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o -[8/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o -[9/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o -[10/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o -[11/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o +[2/22] Building CXX object src/CMakeFiles/benchmark.dir/sleep.cc.o +[3/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_api_internal.cc.o +[4/22] Building CXX object src/CMakeFiles/benchmark_main.dir/benchmark_main.cc.o +[5/22] Building CXX object src/CMakeFiles/benchmark.dir/colorprint.cc.o +[6/22] Building CXX object src/CMakeFiles/benchmark.dir/timers.cc.o +[7/22] Building CXX object src/CMakeFiles/benchmark.dir/perf_counters.cc.o +[8/22] Building CXX object src/CMakeFiles/benchmark.dir/counter.cc.o +[9/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o +[10/22] Building CXX object src/CMakeFiles/benchmark.dir/commandlineflags.cc.o +[11/22] Building CXX object src/CMakeFiles/benchmark.dir/reporter.cc.o [12/22] Building CXX object src/CMakeFiles/benchmark.dir/console_reporter.cc.o -[13/22] Building CXX object src/CMakeFiles/benchmark.dir/string_util.cc.o -[14/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o -[15/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o -[16/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o -[17/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o +[13/22] Building CXX object src/CMakeFiles/benchmark.dir/csv_reporter.cc.o +[14/22] Building CXX object src/CMakeFiles/benchmark.dir/json_reporter.cc.o +[15/22] Building CXX object src/CMakeFiles/benchmark.dir/complexity.cc.o +[16/22] Building CXX object src/CMakeFiles/benchmark.dir/sysinfo.cc.o +[17/22] Building CXX object src/CMakeFiles/benchmark.dir/statistics.cc.o [18/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_runner.cc.o [19/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark.cc.o [20/22] Building CXX object src/CMakeFiles/benchmark.dir/benchmark_register.cc.o @@ -180,7 +194,7 @@ Call Stack (most recent call first): [16/17] Building CXX object benchmarks/Vectorization/CMakeFiles/vectorization-matrix-benchmark.dir/MLIRMatVecBenchmark.cpp.o [17/17] Linking CXX executable bin/vectorization-matrix-benchmark [Info] Running vectorization-matrix-benchmark... -2025-09-07T13:14:33+00:00 +2025-09-07T14:30:43+00:00 Running ./vectorization-matrix-benchmark Run on (24 X 5100 MHz CPU s) CPU Caches: @@ -188,13 +202,13 @@ CPU Caches: L1 Instruction 32 KiB (x12) L2 Unified 1280 KiB (x12) L3 Unified 30720 KiB (x1) -Load Average: 3.06, 3.26, 4.89 +Load Average: 2.98, 3.27, 4.21 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- -MLIR_MatMul/1 19.3 ns 19.3 ns 36245532 -MLIR_MatVec/1 21.0 ns 21.0 ns 33833597 +MLIR_MatMul/1 19.4 ns 19.4 ns 36434213 +MLIR_MatVec/1 20.8 ns 20.8 ns 34006039 -------------------------------------------------------- MLIR_MatMul: MLIR MatMul Operation + Nested Loop [ 18 18 18 18 18 18 18 18 18 18 ] From b88ff30a4c897fc819769f89be18311350f7fec4 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 17:23:40 +0200 Subject: [PATCH 50/52] update --- .github/workflows/watch-upstream.yml | 68 ++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 .github/workflows/watch-upstream.yml diff --git a/.github/workflows/watch-upstream.yml b/.github/workflows/watch-upstream.yml new file mode 100644 index 00000000..68c4ffeb --- /dev/null +++ b/.github/workflows/watch-upstream.yml @@ -0,0 +1,68 @@ +name: Watch upstream and trigger bench on change +on: + schedule: + - cron: "*/10 * * * *" # every 10 minutes + workflow_dispatch: + +permissions: + contents: write # to push state branch + actions: write # to dispatch workflows + +concurrency: + group: watch-upstream + cancel-in-progress: true + +jobs: + watch: + runs-on: self-hosted + steps: + - name: Checkout state branch (or create) + uses: actions/checkout@v4 + with: + ref: automation-state + fetch-depth: 0 + + - name: Get upstream HEAD SHAs + id: head + run: | + set -e + MLIR_SHA=$(git ls-remote https://github.com/buddy-compiler/buddy-mlir.git refs/heads/main | awk '{print $1}') + BENCH_UP_SHA=$(git ls-remote https://github.com/buddy-compiler/buddy-benchmark.git refs/heads/main | awk '{print $1}') + echo "mlir=${MLIR_SHA}" >> $GITHUB_OUTPUT + echo "bench=${BENCH_UP_SHA}" >> $GITHUB_OUTPUT + + - name: Load previous SHAs + id: prev + run: | + echo "prev_mlir=$(cat mlir.sha 2>/dev/null || echo none)" >> $GITHUB_OUTPUT + echo "prev_bench=$(cat bench.sha 2>/dev/null || echo none)" >> $GITHUB_OUTPUT + + - name: Decide if changed + id: decide + run: | + changed=false + if [ "${{ steps.head.outputs.mlir }}" != "${{ steps.prev.outputs.prev_mlir }}" ] || \ + [ "${{ steps.head.outputs.bench }}" != "${{ steps.prev.outputs.prev_bench }}" ]; then + changed=true + fi + echo "changed=$changed" >> $GITHUB_OUTPUT + + - name: Update state branch + if: steps.decide.outputs.changed == 'true' + run: | + printf "%s" "${{ steps.head.outputs.mlir }}" > mlir.sha + printf "%s" "${{ steps.head.outputs.bench }}" > bench.sha + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add mlir.sha bench.sha + git commit -m "state: mlir=${{ steps.head.outputs.mlir }} bench=${{ steps.head.outputs.bench }}" || echo "no changes" + git push origin HEAD:automation-state + + - name: Dispatch bench.yml in this repo + if: steps.decide.outputs.changed == 'true' + run: | + curl -sS -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + https://api.github.com/repos/${{ github.repository }}/actions/workflows/bench.yml/dispatches \ + -d '{"ref":"main"}' From 593a995f8e08e9c948c0cf066d5153d375bddc75 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 17:27:33 +0200 Subject: [PATCH 51/52] update --- .github/workflows/bench.yml | 12 +++++++++++- .github/workflows/watch-upstream.yml | 22 +++++++++++++++++++--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 77ae1b32..20035f68 100755 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -5,6 +5,14 @@ on: push: # fire on any branch branches: ['**'] pull_request: + workflow_dispatch: + inputs: + upstream_repo: + description: "Which repo changed (mlir|benchmark)" + required: false + upstream_sha: + description: "Upstream commit SHA for result folder" + required: false jobs: bench: @@ -48,8 +56,10 @@ jobs: - name: Set BENCH_DATE and BENCH_DIR run: | bench_date=$(date +'%Y-%m-%d') + # Prefer a passed upstream SHA, otherwise use this workflow's SHA + run_sha="${{ github.event.inputs.upstream_sha || github.sha }}" echo "BENCH_DATE=$bench_date" >> "$GITHUB_ENV" - echo "BENCH_DIR=$HOME/buddy-complier-workspace/buddy-benchmark/site/benchmarks/$bench_date/${GITHUB_SHA}" >> "$GITHUB_ENV" + echo "BENCH_DIR=$HOME/buddy-complier-workspace/buddy-benchmark/site/benchmarks/$bench_date/${run_sha}" >> "$GITHUB_ENV" # ------------------------------------------------------------ # 3-5) (UNCHANGED) upload raw logs, convert to HTML, deploy Pages diff --git a/.github/workflows/watch-upstream.yml b/.github/workflows/watch-upstream.yml index 68c4ffeb..ad7649a1 100644 --- a/.github/workflows/watch-upstream.yml +++ b/.github/workflows/watch-upstream.yml @@ -16,12 +16,16 @@ jobs: watch: runs-on: self-hosted steps: - - name: Checkout state branch (or create) + - name: Checkout default branch uses: actions/checkout@v4 with: - ref: automation-state + ref: main # adjust if your default branch differs fetch-depth: 0 + - name: Create/switch to automation-state branch + run: | + git checkout -B automation-state + - name: Get upstream HEAD SHAs id: head run: | @@ -47,6 +51,18 @@ jobs: fi echo "changed=$changed" >> $GITHUB_OUTPUT + - name: Determine which repo changed + if: steps.decide.outputs.changed == 'true' + id: which + run: | + if [ "${{ steps.head.outputs.mlir }}" != "${{ steps.prev.outputs.prev_mlir }}" ]; then + echo "repo=mlir" >> $GITHUB_OUTPUT + echo "sha=${{ steps.head.outputs.mlir }}" >> $GITHUB_OUTPUT + else + echo "repo=benchmark" >> $GITHUB_OUTPUT + echo "sha=${{ steps.head.outputs.bench }}" >> $GITHUB_OUTPUT + fi + - name: Update state branch if: steps.decide.outputs.changed == 'true' run: | @@ -65,4 +81,4 @@ jobs: -H "Accept: application/vnd.github+json" \ -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ https://api.github.com/repos/${{ github.repository }}/actions/workflows/bench.yml/dispatches \ - -d '{"ref":"main"}' + -d '{"ref":"main","inputs":{"upstream_repo":"${{ steps.which.outputs.repo }}","upstream_sha":"${{ steps.which.outputs.sha }}"}}' From a578af9f213c82fe60d923c33c1df0065623547e Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 7 Sep 2025 20:23:03 +0200 Subject: [PATCH 52/52] update --- .github/workflows/watch-upstream.yml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/watch-upstream.yml b/.github/workflows/watch-upstream.yml index ad7649a1..49a8c56d 100644 --- a/.github/workflows/watch-upstream.yml +++ b/.github/workflows/watch-upstream.yml @@ -24,7 +24,15 @@ jobs: - name: Create/switch to automation-state branch run: | - git checkout -B automation-state + set -e + git fetch origin automation-state || true + if git show-ref --verify --quiet refs/remotes/origin/automation-state; then + # Start from the remote state branch to avoid non-ff push + git checkout -B automation-state origin/automation-state + else + # First run: create a new state branch from current HEAD + git checkout -B automation-state + fi - name: Get upstream HEAD SHAs id: head @@ -66,13 +74,17 @@ jobs: - name: Update state branch if: steps.decide.outputs.changed == 'true' run: | + set -e printf "%s" "${{ steps.head.outputs.mlir }}" > mlir.sha printf "%s" "${{ steps.head.outputs.bench }}" > bench.sha git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" git add mlir.sha bench.sha git commit -m "state: mlir=${{ steps.head.outputs.mlir }} bench=${{ steps.head.outputs.bench }}" || echo "no changes" - git push origin HEAD:automation-state + # Re-sync and push with lease to avoid non-ff failures if remote advanced + git fetch origin automation-state || true + git rebase origin/automation-state || true + git push --force-with-lease=refs/heads/automation-state origin HEAD:automation-state - name: Dispatch bench.yml in this repo if: steps.decide.outputs.changed == 'true'