Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/workflows/lfric_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,13 @@ jobs:
rm -rf working-gh-ompoffload
export BUILD_START="${SECONDS}"
LFRIC_OFFLOAD_DIRECTIVES=omp ./build/local_build.py -v -j ${NUM_PARALLEL} -p psyclone-test \
-w working-gh-ompoffload gungho_model
-w working-gh-ompoffload gungho_model |& tee output.txt
# Piping to tee ignores the errcode of the first command, make sure we account for it
if [[ ${PIPESTATUS[0]} -ne 0 ]]; then
exit ${PIPESTATUS[0]}
fi
export BUILD_ELAPSED=$((${SECONDS}-${BUILD_START}))
${PSYCLONE_LFRIC_DIR}/aggregate_gpu_stats.sh output.txt
cd applications/gungho_model/example
rm -f timer.txt gungho_model-checksums.txt # In case there were from a previous run
export OMP_NUM_THREADS=12
Expand Down
97 changes: 97 additions & 0 deletions examples/lfric/scripts/aggregate_gpu_stats.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/env bash

# -----------------------------------------------------------------------------
# BSD 3-Clause License
#
# Copyright (c) 2018-2026, Science and Technology Facilities Council.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Authors: S. Siso, STFC Daresbury Lab

# Check a filename argument is given
if [[ $# -ne 1 ]]; then
echo "Usage: gpu_stats.sh <filename>"
exit 1
fi
filename=$1
if [[ ! -r "$filename" ]]; then
echo "$filename does not exist or is not readable"
exit 1
fi


count_uniq() {
echo -n "$1: "
grep "$1" $filename | sort | uniq | wc -l
}

check_above() {
value=$(grep "$1" $filename | sort | uniq | wc -l)
if [[ $value -lt $2 ]]; then
echo
echo "Error: Number of $1 is below $2"
exit 1
fi
}

echo " --- First we need to be able to modify kernels ---"
count_uniq "Module-inline successful"
count_uniq "Module-inline failed"
echo -n " -> "
count_uniq "because it accesses data from its outer scope"
echo
echo " --- Then we need to inline them, or fallback to GPU routine annotations ---"
count_uniq "Inline successful"
count_uniq "Inline failed"
count_uniq "Annotation successful"
count_uniq "Annotation failed"
echo -n " -> "
count_uniq "accesses the imported symbol"
echo -n " -> "
count_uniq "calls another routine"
echo -n " -> "
count_uniq "calls intrinsic"
echo -n " -> "
count_uniq "only supports the transformation of a MATMUL operation when"

echo
echo " --- Then offload each loop with kernels inside ---"
count_uniq "Offload independent loop"
count_uniq "Offload with dof loop"
count_uniq "Offload with atomics"
count_uniq "Offload with cell colouring"
count_uniq "Offload with cell tile-colouring"
count_uniq "Failed to offload"
count_uniq "Added inner loop nested parallelism"
count_uniq "Added OMP threading"

check_above "Module-inline successful" 277
check_above "Offload independent loop" 88
check_above "Offload with cell colouring" 40
96 changes: 55 additions & 41 deletions examples/lfric/scripts/gpu_offloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,8 @@
# S. Siso, STFC Daresbury Lab
# L. Mosimann, NVIDIA.

'''PSyclone transformation script for LFRic to apply colouring and GPU
offloading. Also adds redundant computation to the level-1 halo for
setval_* generically.
'''PSyclone transformation script for LFRic to apply GPU offloading directives.
Also adds redundant computation to the level-1 halo for setval_* generically.

'''
import os
Expand All @@ -52,23 +51,25 @@
Call, Directive, IntrinsicCall, Loop, Routine, Schedule)
from psyclone.psyir.transformations import (
ACCKernelsTrans, Matmul2CodeTrans, OMPTargetTrans, TransformationError,
OMPDeclareTargetTrans, OMPParallelTrans, ACCLoopTrans)
OMPDeclareTargetTrans, OMPParallelTrans, InlineTrans)
from psyclone.transformations import (
LFRicColourTrans, LFRicOMPLoopTrans,
ACCParallelTrans, ACCRoutineTrans,
OMPLoopTrans)
from psyclone.psyir.transformations import ACCLoopTrans


# Names of any invoke that we won't add any GPU offloading
INVOKE_EXCLUSIONS = [
]
INVOKE_EXCLUSIONS = []

# We won't attempt to inline calls to routines with names that contain
# these strings (because they're not computationally important).
INLINE_EXCLUSIONS = ["abort", "logging"]

OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none")

RESOLVE_IMPORTS = ['constants_mod']


def _replace_matmuls(sched: Schedule):
'''
Expand Down Expand Up @@ -98,8 +99,7 @@ def _replace_matmuls(sched: Schedule):
def trans(psyir):
'''Applies PSyclone colouring and GPU offloading transformations. Any
kernels that cannot be offloaded to GPU are parallelised using OpenMP
on the CPU. Any setval_* kernels are transformed so as to compute
into the L1 halos.
on the CPU.

:param psyir: the PSyIR of the PSy-layer.
:type psyir: :py:class:`psyclone.psyir.nodes.FileContainer`
Expand All @@ -111,6 +111,7 @@ def trans(psyir):
const = LFRicConstants()
cpu_parallel = OMPParallelTrans()
mod_inline_trans = KernelModuleInlineTrans()
inline_trans = InlineTrans()

if OFFLOAD_DIRECTIVES == "omp":
# Use OpenMP offloading
Expand Down Expand Up @@ -141,13 +142,13 @@ def trans(psyir):

print(f"Transforming invoke '{subroutine.name}' ...")

# Make setval_* compute redundantly to the level 1 halo if it
# Make setval_c compute redundantly to the level 1 halo if it
# is in its own loop and only if it has an iteration space that is
# *not* restricted to owned dofs.
for loop in subroutine.loops():
if loop.iteration_space == "dof":
if len(loop.kernels()) == 1:
if loop.kernels()[0].name in ["setval_c", "setval_x"]:
if loop.kernels()[0].name in ["setval_c"]:
rtrans.apply(loop, options={"depth": 1})

if psyir.name.lower() in INVOKE_EXCLUSIONS:
Expand All @@ -168,41 +169,52 @@ def trans(psyir):
const.VALID_DISCONTINUOUS_NAMES):
ctrans.apply(loop)

# Module-inline the Kernels inside the loops and then mark them as
# GPU-enabled.
# (The latter step won't be necessary if/when we fully inline them.)
# We need to Module-inline all Kernels inside the loops and then mark
# them with GPU-enabling directive or Inline them.
for loop in subroutine.loops():
if offload:
for kern in loop.kernels():
if isinstance(kern, LFRicBuiltIn):
# BuiltIns are replaced with inlined code when lowering
continue
# Attempt to module-inline the kernel.
try:
mod_inline_trans.apply(kern)
print(f"Module-inlined kernel '{kern.name}'")
except TransformationError as err:
failed_inline.add(kern.name.lower())
print(f"Failed to module-inline kernel "
f"'{kern.name}' due to:\n{err.value}")
for kern in loop.kernels():
if not offload or isinstance(kern, LFRicBuiltIn):
# BuiltIns and kernels inside excluded invokes do
# not need inlining/annotations.
continue

# Attempt to module-inline the kernel.
try:
mod_inline_trans.apply(kern)
print(f"Module-inline successful for kernel "
f"'{kern.name}'")
except TransformationError as err:
failed_inline.add(kern.name.lower())
print(f"Module-inline failed for kernel "
f"'{kern.name}' due to:\n{err.value}")

# Attempt to fully inline the kernel
try:
# Ensure any MATMULs within the kernel are also inlined
for routine in kern.get_callees():
_replace_matmuls(routine)

inline_trans.apply(kern)
print(f"Inline successful for kernel "
f"'{kern.name}'")
continue
except TransformationError as err:
failed_inline.add(kern.name.lower())
print(f"Inline failed for kernel "
f"'{kern.name}' due to:\n{err.value}")

# If it cannot be inlined, fallback to annotate the
# kernel with GPU routine directives.
try:
# Ensure any MATMULs within the kernel are
# replaced.
for routine in kern.get_callees():
_replace_matmuls(routine)
# Finally, annotate the kernel routine for GPU.
gpu_annotation_trans.apply(kern)
print(f"Annotated kernel '{kern.name}'")
print(f"Annotation successful for kernel "
f"'{kern.name}'")
except TransformationError as err:
failed_to_offload.add(kern.name.lower())
print(f"Failed to annotate '{kern.name}' with "
f"GPU-enabled directive due to:\n"
f"{err.value}")
# For annotated/inlined kernels we could attempt to
# provide compile-time dimensions for temporary arrays
# and convert to code any unsupported intrinsics.

# Add GPU offloading to loops unless they are over colours or are null.
print(f"Annotation failed for kernel '{kern.name}' "
f"due to:\n{err.value}")

# Add GPU offloading to loops
for loop in subroutine.walk(Loop):
kernel_names = [k.name.lower() for k in loop.kernels()]
if offload and all(name not in failed_to_offload for name in
Expand All @@ -214,10 +226,12 @@ def trans(psyir):
loop_offloading_trans.apply(
loop, options={"independent": True})
gpu_region_trans.apply(loop.ancestor(Directive))
print(f"Offload with cell colouring: {kernel_names}")
if loop.loop_type == "":
loop_offloading_trans.apply(
loop, options={"independent": True})
gpu_region_trans.apply(loop.ancestor(Directive))
print(f"Offload independent loop: {kernel_names}")
if loop.loop_type == "dof":
# Loops over dofs can contains reductions
if kernels_trans:
Expand All @@ -231,9 +245,9 @@ def trans(psyir):
loop_offloading_trans.apply(
loop, options={"independent": True})
gpu_region_trans.apply(loop.ancestor(Directive))
print(f"Offload with dof loop: {kernel_names}")
# Alternatively we could use loop parallelism with
# reduction clauses
print(f"Successfully offloaded loop with {kernel_names}")
except TransformationError as err:
print(f"Failed to offload loop with {kernel_names} "
f"because: {err}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ def validate_it_can_run_on_gpu(self, node, options, **kwargs):
# An import of a compile-time constant is fine.
continue
raise TransformationError(
f"{k_or_r} '{node.name}' accesses the symbol "
f"'{symbol}' which is imported. If this symbol "
f"{k_or_r} '{node.name}' accesses the imported symbol "
f"'{symbol}'. If this symbol "
f"represents data then it must first be converted to a"
f" {k_or_r} argument using the "
f"KernelImportsToArguments transformation.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1488,9 +1488,9 @@ def test_accroutinetrans_module_use():
rtrans = ACCRoutineTrans()
with pytest.raises(TransformationError) as err:
rtrans.apply(kernels[0])
assert ("accesses the symbol 'magic: Symbol<Import(container='model_mod'"
")>' which is imported. If this symbol "
"represents data then it must first" in str(err.value))
assert ("accesses the imported symbol 'magic: Symbol<Import(container="
"'model_mod')>'. If this symbol represents data then it must first"
in str(err.value))
# Tell the ModuleManager where to find the module that is being USED by
# the kernel.
mod_man = ModuleManager.get()
Expand All @@ -1500,9 +1500,9 @@ def test_accroutinetrans_module_use():
with pytest.raises(TransformationError) as err:
rtrans.apply(kernels[0])
assert ("Transformation Error: Kernel 'kernel_with_use_code' accesses "
"the symbol 'magic: DataSymbol<Scalar<REAL, Reference"
"[name:'go_wp']>, Import(container='model_mod')>' which is "
"imported. If this symbol represents data then it must first be "
"the imported symbol 'magic: DataSymbol<Scalar<REAL, Reference"
"[name:'go_wp']>, Import(container='model_mod')>'. "
"If this symbol represents data then it must first be "
"converted to a Kernel argument using the "
"KernelImportsToArguments transformation." in str(err.value))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ def test_gpumixin_validate_no_import(fortran_reader):
rtrans = ACCRoutineTrans()
with pytest.raises(TransformationError) as err:
rtrans.validate(routine)
assert ("Transformation Error: routine 'my_sub' accesses the symbol "
"'some_data: Symbol<Import(container='other_mod')>' which is "
"imported. If this symbol represents data "
assert ("Transformation Error: routine 'my_sub' accesses the imported "
"symbol 'some_data: Symbol<Import(container='other_mod')>'. "
"If this symbol represents data "
"then it must first be converted to a routine argument using the "
"KernelImportsToArguments transformation." in str(err.value))
# Specialise the imported symbol and make it constant.
Expand Down Expand Up @@ -239,8 +239,8 @@ def test_gpumixin_validate_no_cblock(fortran_reader):
routine = psyir.walk(Routine)[0]
with pytest.raises(TransformationError) as err:
rtrans.validate(routine, options={'force': True})
assert ("Transformation Error: routine 'my_sub' accesses the symbol "
"'some_data: Symbol<Import" in str(err.value))
assert ("Transformation Error: routine 'my_sub' accesses the imported "
"symbol 'some_data: Symbol<Import" in str(err.value))


def test_gpumixin_validate_no_call():
Expand Down
14 changes: 7 additions & 7 deletions src/psyclone/tests/psyir/transformations/transformations_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def __getitem__(self, _):
monkeypatch.setattr(routine, "reference_accesses", lambda: DummyVAM())
with pytest.raises(TransformationError) as err:
ompdeclaretargettrans.apply(routine)
assert "which is imported" in str(err.value)
assert "accesses the imported symbol" in str(err.value)


def test_omptaskloop_no_collapse():
Expand Down Expand Up @@ -429,9 +429,9 @@ def test_ompdeclaretargettrans_with_globals(sample_psyir, parser):
ref1.symbol.interface = ImportInterface(ContainerSymbol('my_mod'))
with pytest.raises(TransformationError) as err:
ompdeclaretargettrans.apply(routine)
assert ("routine 'my_subroutine' accesses the symbol 'a: DataSymbol<Array"
"<Scalar<INTEGER, UNDEFINED>, shape=[10, 10]>, "
"Import(container='my_mod')>' which is imported. If this symbol "
assert ("routine 'my_subroutine' accesses the imported symbol "
"'a: DataSymbol<Array<Scalar<INTEGER, UNDEFINED>, shape=[10, 10]>,"
" Import(container='my_mod')>'. If this symbol "
"represents data then it must first be converted to a routine "
"argument using the KernelImportsToArguments transformation."
in str(err.value))
Expand All @@ -448,9 +448,9 @@ def test_ompdeclaretargettrans_with_globals(sample_psyir, parser):
ref1.replace_with(block)
with pytest.raises(TransformationError) as err:
ompdeclaretargettrans.apply(routine)
assert ("routine 'my_subroutine' accesses the symbol 'a: DataSymbol<Array<"
"Scalar<INTEGER, UNDEFINED>, shape=[10, 10]>, "
"Import(container='my_mod')>' which is imported. If this symbol "
assert ("routine 'my_subroutine' accesses the imported symbol "
"'a: DataSymbol<Array<Scalar<INTEGER, UNDEFINED>, shape=[10, 10]>,"
" Import(container='my_mod')>'. If this symbol "
"represents data then it must first be converted to a routine "
"argument using the KernelImportsToArguments transformation."
in str(err.value))
Expand Down
Loading