Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 38 additions & 9 deletions .github/workflows/ccp-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ on:
options:
- 'test_arm32'
- 'test_s390x'
- 'test_ppc64le'
default: 'test_arm32'
log_level:
description: 'Log level'
Expand All @@ -32,7 +33,7 @@ jobs:
# ]
# name: ${{ matrix.system }} Build
# runs-on: ${{ matrix.runner }}
# if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' )}}
# if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' || github.event.inputs.job_to_run != 'test_ppc64le' )}}
# steps:
# - uses: actions/checkout@v5
# - name: cmake
Expand All @@ -51,7 +52,7 @@ jobs:
]
name: ${{ matrix.system }} Build
runs-on: ${{ matrix.runner }}
if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' )}}
if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' || github.event.inputs.job_to_run != 'test_ppc64le' )}}
steps:
- uses: actions/checkout@v5
- name: cmake
Expand All @@ -70,7 +71,7 @@ jobs:
# ]
# name: ${{ matrix.system }} Build
# runs-on: ${{ matrix.runner }}
# if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' )}}
# if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' || github.event.inputs.job_to_run != 'test_ppc64le' )}}
# steps:
# - uses: actions/checkout@v5
# - name: cmake
Expand All @@ -90,7 +91,7 @@ jobs:
# ]
# name: ${{ matrix.system }} Build
# runs-on: ${{ matrix.runner }}
# if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' )}}
# if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' || github.event.inputs.job_to_run != 'test_ppc64le' )}}
# defaults:
# run:
# shell: msys2 {0}
Expand All @@ -117,7 +118,7 @@ jobs:
# ]
# name: ${{ matrix.system }} Build
# runs-on: ${{ matrix.runner }}
# if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' )}}
# if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' || github.event.inputs.job_to_run != 'test_ppc64le' )}}
# steps:
# - uses: actions/checkout@v5
# - name: cmake
Expand All @@ -139,7 +140,7 @@ jobs:
]
name: ${{ matrix.system }} Build and Test
runs-on: ${{ matrix.runner }}
if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' )}}
if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' || github.event.inputs.job_to_run != 'test_ppc64le' )}}
steps:
- uses: actions/checkout@v5
- name: cmake
Expand All @@ -161,7 +162,7 @@ jobs:
]
name: ${{ matrix.system }} Build and Test
runs-on: ${{ matrix.runner }}
if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' )}}
if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' || github.event.inputs.job_to_run != 'test_ppc64le' )}}
steps:
- uses: actions/checkout@v5
- name: cmake
Expand All @@ -184,7 +185,7 @@ jobs:
]
name: ${{ matrix.system }} Build and Test
runs-on: ${{ matrix.runner }}
if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' )}}
if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' || github.event.inputs.job_to_run != 'test_ppc64le' )}}
defaults:
run:
shell: msys2 {0}
Expand Down Expand Up @@ -214,7 +215,7 @@ jobs:
]
name: ${{ matrix.system }} Build and Test
runs-on: ${{ matrix.runner }}
if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' )}}
if: ${{ github.event_name != 'workflow_dispatch' || !(github.event.inputs.job_to_run != 'test_arm32' || github.event.inputs.job_to_run != 'test_s390x' || github.event.inputs.job_to_run != 'test_ppc64le' )}}
steps:
- uses: actions/checkout@v5
- name: cmake
Expand Down Expand Up @@ -266,3 +267,31 @@ jobs:
cmake -DCMAKE_BUILD_TYPE=Release -DOJPH_BUILD_STREAM_EXPAND=ON -DOJPH_ENABLE_TIFF_SUPPORT=OFF -DOJPH_BUILD_TESTS=ON ..
make
ctest --output-on-failure
test_powerpc64le:
name: Linux-ppc64le Build and Test
strategy:
fail-fast: false
matrix:
include: [
{ dist: ubuntu22.04 },
{ dist: ubuntu24.04 },
{ dist: ubuntu_latest },
]
runs-on: ubuntu-latest
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.job_to_run == 'test_ppc64le' }}
steps:
- uses: actions/checkout@v5
- uses: uraimo/run-on-arch-action@v3
with:
arch: ppc64le
distro: ${{ matrix.dist }}
githubToken: ${{ github.token }}
install: |
apt-get update -q -y
apt-get install -q -y cmake make g++ libtiff-dev python3
run: |
cd build
cmake -DCMAKE_BUILD_TYPE=Release -DOJPH_BUILD_STREAM_EXPAND=ON -DOJPH_ENABLE_TIFF_SUPPORT=OFF -DOJPH_BUILD_TESTS=ON ..
make
ctest --output-on-failure
25 changes: 22 additions & 3 deletions src/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ file(GLOB CODESTREAM_SSE2 "codestream/*_sse2.cpp")
file(GLOB CODESTREAM_AVX "codestream/*_avx.cpp")
file(GLOB CODESTREAM_AVX2 "codestream/*_avx2.cpp")
file(GLOB CODESTREAM_WASM "codestream/*_wasm.cpp")
file(GLOB CODESTREAM_VSX "codestream/*_vsx.cpp")
file(GLOB CODING "coding/*.cpp" "coding/*.h")
file(GLOB CODING_SSSE3 "coding/*_ssse3.cpp")
file(GLOB CODING_WASM "coding/*_wasm.cpp")
file(GLOB CODING_AVX2 "coding/*_avx2.cpp")
file(GLOB CODING_AVX512 "coding/*_avx512.cpp")
file(GLOB CODING_VSX "coding/*_vsx.cpp")
file(GLOB COMMON "openjph/*.h")
file(GLOB OTHERS "others/*.cpp" "others/*.c")
file(GLOB TRANSFORM "transform/*.cpp" "transform/*.h")
Expand All @@ -19,10 +21,11 @@ file(GLOB TRANSFORM_AVX "transform/*_avx.cpp")
file(GLOB TRANSFORM_AVX2 "transform/*_avx2.cpp")
file(GLOB TRANSFORM_AVX512 "transform/*_avx512.cpp")
file(GLOB TRANSFORM_WASM "transform/*_wasm.cpp")
file(GLOB TRANSFORM_VSX "transform/*_vsx.cpp")

list(REMOVE_ITEM CODESTREAM ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODESTREAM_WASM})
list(REMOVE_ITEM CODING ${CODING_SSSE3} ${CODING_WASM} ${CODING_AVX2} ${CODING_AVX512})
list(REMOVE_ITEM TRANSFORM ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_AVX512} ${TRANSFORM_WASM})
list(REMOVE_ITEM CODESTREAM ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODESTREAM_WASM} ${CODESTREAM_VSX})
list(REMOVE_ITEM CODING ${CODING_SSSE3} ${CODING_WASM} ${CODING_AVX2} ${CODING_AVX512} ${CODING_VSX})
list(REMOVE_ITEM TRANSFORM ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_AVX512} ${TRANSFORM_WASM} ${TRANSFORM_VSX})
list(APPEND SOURCES ${CODESTREAM} ${CODING} ${COMMON} ${OTHERS} ${TRANSFORM})

source_group("codestream" FILES ${CODESTREAM})
Expand Down Expand Up @@ -112,6 +115,22 @@ else()

endif()

if (("${OJPH_TARGET_ARCH}" MATCHES "OJPH_ARCH_PPC64")
AND (CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le|powerpc64le"))
# native 128-bit VSX kernels (see ojph_simd_vsx.h). Supported
# targets are POWER9 (ISA 3.0) and newer, little-endian only.
# The block decoder dispatch is selective; see
# ojph_codeblock_fun.cpp.
list(APPEND SOURCES ${CODESTREAM_VSX} ${CODING_VSX} ${TRANSFORM_VSX})
source_group("codestream" FILES ${CODESTREAM_VSX})
source_group("coding" FILES ${CODING_VSX})
source_group("transform" FILES ${TRANSFORM_VSX})
set_source_files_properties(codestream/ojph_codestream_vsx.cpp PROPERTIES COMPILE_FLAGS "-mcpu=power9")
set_source_files_properties(coding/ojph_block_decoder_vsx.cpp PROPERTIES COMPILE_FLAGS "-mcpu=power9")
set_source_files_properties(transform/ojph_transform_vsx.cpp PROPERTIES COMPILE_FLAGS "-mcpu=power9")
set_source_files_properties(transform/ojph_colour_vsx.cpp PROPERTIES COMPILE_FLAGS "-mcpu=power9")
endif()

endif()

endif()
Expand Down
49 changes: 49 additions & 0 deletions src/core/codestream/ojph_codeblock_fun.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,19 @@ namespace ojph {
void sse_mem_clear(void* addr, size_t count);
void avx_mem_clear(void* addr, size_t count);
void wasm_mem_clear(void* addr, size_t count);
void vsx_mem_clear(void* addr, size_t count);

//////////////////////////////////////////////////////////////////////////
ui32 gen_find_max_val32(ui32* address);
ui32 sse2_find_max_val32(ui32* address);
ui32 avx2_find_max_val32(ui32* address);
ui32 wasm_find_max_val32(ui32* address);
ui32 vsx_find_max_val32(ui32* address);
ui64 gen_find_max_val64(ui64* address);
ui64 sse2_find_max_val64(ui64* address);
ui64 avx2_find_max_val64(ui64* address);
ui64 wasm_find_max_val64(ui64* address);
ui64 vsx_find_max_val64(ui64* address);


//////////////////////////////////////////////////////////////////////////
Expand All @@ -88,8 +91,12 @@ namespace ojph {
float delta_inv, ui32 count, ui32* max_val);
void wasm_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
float delta_inv, ui32 count, ui32* max_val);
void vsx_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
float delta_inv, ui32 count, ui32* max_val);
void wasm_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
float delta_inv, ui32 count, ui32* max_val);
void vsx_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
float delta_inv, ui32 count, ui32* max_val);

void gen_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
float delta_inv, ui32 count, ui64* max_val);
Expand All @@ -99,6 +106,8 @@ namespace ojph {
float delta_inv, ui32 count, ui64* max_val);
void wasm_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
float delta_inv, ui32 count, ui64* max_val);
void vsx_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
float delta_inv, ui32 count, ui64* max_val);

//////////////////////////////////////////////////////////////////////////
void gen_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
Expand All @@ -115,8 +124,12 @@ namespace ojph {
float delta, ui32 count);
void wasm_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
float delta, ui32 count);
void vsx_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
float delta, ui32 count);
void wasm_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
float delta, ui32 count);
void vsx_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
float delta, ui32 count);

void gen_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
float delta, ui32 count);
Expand All @@ -128,6 +141,8 @@ namespace ojph {
float delta, ui32 count);
void wasm_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
float delta, ui32 count);
void vsx_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
float delta, ui32 count);

void codeblock_fun::init(bool reversible) {

Expand Down Expand Up @@ -246,6 +261,40 @@ namespace ojph {

#elif defined(OJPH_ARCH_ARM)

#elif defined(OJPH_ARCH_PPC64LE)

// 128-bit VSX kernels; see ojph_simd_vsx.h.
// The SIMD block decoder is used everywhere on POWER10 (ISA 3.1),
// where it beats the scalar decoder on all measured content. On
// POWER9 it wins for irreversible content (more magnitude bits
// per sample) but trails the scalar decoder slightly on
// reversible content, so it is dispatched only for the former.
if (get_cpu_ext_level() >= PPC_CPU_EXT_LEVEL_ARCH_3_1 ||
(!reversible &&
get_cpu_ext_level() >= PPC_CPU_EXT_LEVEL_ARCH_3_00))
decode_cb32 = ojph_decode_codeblock_vsx;
if (get_cpu_ext_level() >= PPC_CPU_EXT_LEVEL_ARCH_3_00) {
find_max_val32 = vsx_find_max_val32;
mem_clear = vsx_mem_clear;
if (reversible) {
tx_to_cb32 = vsx_rev_tx_to_cb32;
tx_from_cb32 = vsx_rev_tx_from_cb32;
}
else {
tx_to_cb32 = vsx_irv_tx_to_cb32;
tx_from_cb32 = vsx_irv_tx_from_cb32;
}
find_max_val64 = vsx_find_max_val64;
if (reversible) {
tx_to_cb64 = vsx_rev_tx_to_cb64;
tx_from_cb64 = vsx_rev_tx_from_cb64;
}
else {
tx_to_cb64 = NULL;
tx_from_cb64 = gen_irv_tx_from_cb64;
}
}

#endif // !(defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))

#endif // !OJPH_DISABLE_SIMD
Expand Down
8 changes: 4 additions & 4 deletions src/core/codestream/ojph_codestream_local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ namespace ojph {
this->pre_alloc();
this->finalize_alloc();

ui16 t = swap_bytes_if_le(JP2K_MARKER::SOC);
ui16 t = swap_bytes_if_le((ui16)JP2K_MARKER::SOC);
if (file->write(&t, 2) != 2)
OJPH_ERROR(0x00030022, "Error writing to file");

Expand Down Expand Up @@ -670,7 +670,7 @@ namespace ojph {
OJPH_INT_TO_STRING(OPENJPH_VERSION_MINOR) "."
OJPH_INT_TO_STRING(OPENJPH_VERSION_PATCH) ".";
size_t len = strlen(buf);
*(ui16*)buf = swap_bytes_if_le(JP2K_MARKER::COM);
*(ui16*)buf = swap_bytes_if_le((ui16)JP2K_MARKER::COM);
*(ui16*)(buf + 2) = swap_bytes_if_le((ui16)(len - 2));
//1 for General use (IS 8859-15:1999 (Latin) values)
*(ui16*)(buf + 4) = swap_bytes_if_le((ui16)(1));
Expand All @@ -680,7 +680,7 @@ namespace ojph {
if (comments != NULL) {
for (ui32 i = 0; i < num_comments; ++i)
{
t = swap_bytes_if_le(JP2K_MARKER::COM);
t = swap_bytes_if_le((ui16)JP2K_MARKER::COM);
if (file->write(&t, 2) != 2)
OJPH_ERROR(0x00030029, "Error writing to file");
t = swap_bytes_if_le((ui16)(comments[i].len + 4));
Expand Down Expand Up @@ -1151,7 +1151,7 @@ namespace ojph {
}
for (si32 i = 0; i < repeat; ++i)
tiles[i].flush(outfile);
ui16 t = swap_bytes_if_le(JP2K_MARKER::EOC);
ui16 t = swap_bytes_if_le((ui16)JP2K_MARKER::EOC);
if (!outfile->write(&t, 2))
OJPH_ERROR(0x00030071, "Error writing to file");
}
Expand Down
Loading
Loading