Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
130 commits
Select commit Hold shift + click to select a range
3bbdef5
#2716 add initial fix and test [skip ci]
arporter Sep 17, 2024
459e26d
#2716 fix linting
arporter Sep 17, 2024
37188bf
#2716 rm check for polymorphic kernels, ensure renamed kern is public
arporter Sep 18, 2024
4aa1c0c
#2716 fix linting
arporter Sep 18, 2024
dd29ca3
Merge branch 'master' into 2716_transform_interface_bug
arporter Sep 27, 2024
4383c7c
#2716 WIP exploring options
arporter Sep 27, 2024
60f4add
Merge branch 'master' into 2716_transform_interface_bug
arporter Oct 2, 2024
62b5da5
#2716 WIP plumbing-in inlining of multiple kernel routines
arporter Oct 2, 2024
5a10b88
#2716 more fixes [skip ci]
arporter Oct 2, 2024
d217661
Merge branch 'master' into 2716_transform_interface_bug
arporter Oct 3, 2024
cc657a9
#2716 get KernelModuleInlineTrans tests working [skip ci]
arporter Oct 3, 2024
08f18c8
#2716 fix linting
arporter Oct 3, 2024
53147c6
#2716 more linting
arporter Oct 3, 2024
4198608
#2716 fix a lot of tests
arporter Oct 3, 2024
e079988
#2716 fix remaining tests
arporter Oct 3, 2024
ca153a4
#2716 fix examples
arporter Oct 3, 2024
e35bcb2
Merge branch 'master' into 2716_transform_interface_bug
arporter Oct 3, 2024
71a2630
#2716 revert some unnecessary changes
arporter Oct 3, 2024
ba58c82
#2716 tidying and improving comments/docstrings
arporter Oct 4, 2024
2ac83b5
#2716 add tests for KernelModuleInlineTrans
arporter Oct 7, 2024
e5d5699
#2716 fix coverage of gocean_move_iteration_boundaries_inside
arporter Oct 7, 2024
f026e21
#2716 rm need for polymorphic checks for GOcean Kernels
arporter Oct 7, 2024
6925697
#2716 improve coverage
arporter Oct 7, 2024
0b6ce32
#2716 improve _rm_imported_symbol and only attempt to add interface s…
arporter Oct 8, 2024
a00f367
#2716 add InterfaceDeclGen to f2pygen
arporter Oct 8, 2024
2b82201
#2716 fixes for the transformation in LFRic
arporter Oct 8, 2024
b9987fc
Merge branch 'master' into 2716_transform_interface_bug
arporter Oct 8, 2024
0fd9464
#2716 fix tests broken by merge
arporter Oct 8, 2024
2e76d3a
#2716 update opt script in repo and fix OMPDeclareTargetTrans
arporter Oct 8, 2024
721ff5d
#2716 mark MATMUL as available on GPU
arporter Oct 8, 2024
8319e95
#2716 fix test for matmul on gpu
arporter Oct 8, 2024
33376ff
#2716 ensure Kern points to inlined PSyIR after transformation [skip ci]
arporter Oct 9, 2024
e8b3c0b
#2716 improvements to validation of calls that resolve to multiple ro…
arporter Oct 10, 2024
0903b0a
#2716 add new inlining test
arporter Oct 10, 2024
a8d357d
#2716 add new test source file
arporter Oct 10, 2024
48bac41
#2716 return early if PSyKAl kernel already module inlined
arporter Oct 11, 2024
df74591
Merge branch 'master' into 2716_transform_interface_bug
arporter Oct 11, 2024
c26b8cc
#2716 improve apply() so that it returns early if routine already inl…
arporter Oct 11, 2024
4568467
#2716 update lfric inlining example (eg2)
arporter Oct 14, 2024
98daf23
Merge branch 'master' into 2716_transform_interface_bug
arporter Nov 4, 2024
bcb18ea
Merge branch 'master' into 2716_transform_interface_bug
arporter Nov 7, 2024
f11dbc9
Merge branch 'master' into 2716_transform_interface_bug
arporter Nov 7, 2024
e91a4e7
Merge branch 'master' into 2716_transform_interface_bug
arporter Nov 14, 2024
dbdec4f
#2716 tidying after merge
arporter Nov 14, 2024
a0ac65a
Merge branch 'master' into 2716_transform_interface_bug
arporter Nov 18, 2024
62b53d6
#2716 add RANDOM_NUMBER to intrinsics available on device
arporter Nov 18, 2024
8b025db
#2716 rename Kern._kern_schedule to plural and tidy
arporter Nov 18, 2024
3d5985b
Merge branch 'master' into 2716_transform_interface_bug
arporter Nov 19, 2024
7cdb373
#2716 add xfail for failure to compile case where only one kernel is …
arporter Nov 19, 2024
d18730f
#2716 improve xfailing test to be more specific
arporter Nov 20, 2024
029e647
Merge branch 'master' into 2716_transform_interface_bug
arporter Nov 21, 2024
be9d61a
#2716 improve comment
arporter Nov 22, 2024
25a3bc6
Merge branch 'master' into 2716_transform_interface_bug
arporter Nov 25, 2024
7dce555
Merge branch 'master' into 2716_transform_interface_bug
arporter Nov 27, 2024
b37f41a
Merge branch 'master' into 2716_transform_interface_bug
arporter Nov 28, 2024
ee64d54
#2732 mv fix for interface symbols into new lfric_psy.py file
arporter Nov 28, 2024
5289791
#2716 extend get_callees() to allow for private routines
arporter Nov 28, 2024
ce6f6a9
#2716 fix linting
arporter Nov 28, 2024
54b9a7f
#2716 experiment with recursive inlining
arporter Nov 28, 2024
c739468
#2716 fix bug in get_callees() when no Container is created
arporter Nov 29, 2024
622e015
#2716 improve robustness of Matmul2CodeTrans.validate()
arporter Nov 29, 2024
6f45c36
#2716 add Matmul2CodeTrans to gpu_offloading.py
arporter Nov 29, 2024
5fdf36d
#2716 fixes to KernelModuleInlineTrans to allow for previously-inline…
arporter Nov 29, 2024
b602078
Merge branch 'master' into 2716_transform_interface_bug [skip ci]
arporter Dec 2, 2024
bc31ad7
#2716 tidy after merge
arporter Dec 2, 2024
6b1af69
#2716 fix test failures after merge
arporter Dec 2, 2024
f1dc447
#2716 tidy updated gpu_offloading.py to fix linting errors
arporter Dec 10, 2024
5c72806
Merge branch 'master' into 2716_transform_interface_bug
arporter Dec 10, 2024
dfe80ce
Merge branch 'master' into 2716_transform_interface_bug
arporter Jan 7, 2025
7f3d155
Merge branch 'master' into 2716_transform_interface_bug
arporter Mar 13, 2025
953931a
#2716 fix merging errors
arporter Mar 13, 2025
49b4850
#2716 improve symbol search in validate() to allow for nested scopes
arporter Mar 13, 2025
d7177a2
#2716 fix error in error message
arporter Mar 13, 2025
f2759bf
Merge branch '2845_inline_symbol_bug' into 2716_update_inlining
arporter Mar 13, 2025
421f20d
#2716 WIP fixing tests [skip ci]
arporter Mar 14, 2025
ba4a2f2
#2716 fix linting [skip ci]
arporter Mar 14, 2025
7192011
#2716 WIP fixing merge [skip ci]
arporter Mar 14, 2025
1bcd8d7
#2716 fix KernelModuleInlineTrans tests [skip ci]
arporter Mar 14, 2025
770ced2
#2716 fix all tests
arporter Mar 14, 2025
1c9bf81
#2716 fix linting
arporter Mar 14, 2025
d8cac6f
#2716 fix all tests
arporter Mar 14, 2025
c1ff7b6
Merge branch 'master' into 2716_transform_interface_bug
arporter Apr 9, 2025
294aea6
#2716 WIP fixing merged files [skip ci]
arporter Apr 9, 2025
af91edc
#2716 finish fixing tests
arporter Apr 10, 2025
c431bc9
#2716 fix cov for Call and LFRicKern
arporter Apr 10, 2025
adf1258
#2716 get full cov of Container
arporter Apr 10, 2025
16978af
#2716 rm __all__ from Call because of doc warning
arporter Apr 10, 2025
3318f2e
#2716 allow for missing symbol_table in Routine.name setter
arporter Apr 10, 2025
d84b829
#2716 update KernelModuleInlineTrans so that all calls of a given Cod…
arporter Apr 14, 2025
fa787a2
#2716 allow for interface symbol when updating calls after mod-inlining
arporter Apr 15, 2025
6b4f9ce
Merge branch 'master' into 2716_transform_interface_bug
arporter Apr 15, 2025
27b1e3a
#2716 update gpu_offloading script to exclude MATMULs
arporter Apr 23, 2025
f00d72b
Merge branch 'master' into 2716_transform_interface_bug
arporter Apr 23, 2025
7e8f40a
#2716 tidy comment [skip ci]
arporter Apr 23, 2025
1f9a88c
#2716 rm unused code
arporter Apr 23, 2025
82dfcb8
#2716 fix lfric_kern coverage
arporter Apr 23, 2025
7834fb0
#2716 ensure inserted interface symbol is private
arporter Apr 24, 2025
4d1e02d
#2716 rm un-needed check from LFRicKern
arporter Apr 24, 2025
3bf5d3b
Merge branch 'master' into 2716_transform_interface_bug
arporter May 28, 2025
9c8615d
#2716 fix tests after merge
arporter May 28, 2025
fa0491b
#2719 change get_kernel_schedule() to only return Schedules
arporter May 28, 2025
68e56bd
#2716 refactor _get_psyir_to_inline
arporter May 28, 2025
fedbb7d
#2716 fix bug in get_callees
arporter May 28, 2025
96b438c
#2716 update examples
arporter May 28, 2025
4929ac3
#2716 fix typing capitalisation for 3.8
arporter May 29, 2025
5a85145
#2716 fix missed lines
arporter May 29, 2025
35ca767
#2716 improve unit coverage
arporter May 29, 2025
5df5646
Merge branch 'master' into 2716_transform_interface_bug
arporter Jun 3, 2025
76471fa
#2716 fix after merge
arporter Jun 3, 2025
486d7e5
Merge branch 'master' into 2716_transform_interface_bug
arporter Jun 9, 2025
bb97eba
#2716 rename get_kernel_schedule to get_callees [skip ci]
arporter Jun 11, 2025
4fc98e5
#2716 fix incorrect renaming
arporter Jun 13, 2025
508438c
Merge branch 'master' into 2716_transform_interface_bug
arporter Jun 13, 2025
852ab72
#2716 simplify gpu_offloading script [skip ci]
arporter Jun 13, 2025
eefdb9c
Merge branch 'master' into 2716_transform_interface_bug
arporter Jun 19, 2025
15c6d56
#2716 tidy kernel_print and extend example 19 to exercise it
arporter Jun 19, 2025
49b4410
#2716 tidy and remove attempts to inline from gpu_offloading.py
arporter Jun 19, 2025
393b6cd
#2716 add support for interface symbol renamed on import
arporter Jun 20, 2025
71c1c13
#2719 rename to get_callees() in examples
arporter Jun 20, 2025
84522ef
#2716 add support for renaming of non-polymorphic routine
arporter Jun 20, 2025
b4a02a3
#2716 update Dev Guide with info on get_callees()
arporter Jun 20, 2025
a185081
#2716 mv check on whether mod-inlining required to validate()
arporter Jun 23, 2025
655e871
#2716 rm MATMUL from list of intrinsics on device
arporter Jun 23, 2025
1858ea0
#2716 tidying for review
arporter Jun 23, 2025
f95cb1a
#2716 fix linting
arporter Jun 23, 2025
8abadc3
#2716 fix linting of examples
arporter Jun 23, 2025
58ae8c9
#2716 fix test for MATMUL available on device
arporter Jun 23, 2025
2ee067b
Replace get_kernel_schedule
sergisiso Jun 30, 2025
9338b7e
Bring to master
sergisiso Jun 30, 2025
00c36fd
#2732 Update changelog
sergisiso Jun 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
43) PR #2732 for #2716. Add support for module-inlining polymorphic kernels
and rename get_kernel_schedule to get_callees (to match the Call method).

42) PR #3034 for #2837. Updates the supported versions of Python used in
the test suite to 3.10 and 3.13.

Expand Down
10 changes: 5 additions & 5 deletions doc/developer_guide/transformations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@ Kernel Transformations
PSyclone is able to perform kernel transformations by obtaining the PSyIR
representation of the kernel with:

.. automethod:: psyclone.psyGen.CodedKern.get_kernel_schedule
.. automethod:: psyclone.psyGen.CodedKern.get_callees
:no-index:

The result of `psyclone.psyGen.Kern.get_kernel_schedule` is a
`psyclone.psyir.nodes.KernelSchedule` which is a specialisation of the
`Routine` class with the `is_program` and `return_type` properties set to
`False` and `None`, respectively.
The result of `psyclone.psyGen.Kern.get_callees` is a list of
`psyclone.psyir.nodes.KernelSchedule` objects. `KernelSchedule` is a
specialisation of the `Routine` class with the `is_program` and `return_type`
properties set to False` and `None`, respectively.

In addition to modifying the kernel PSyIR with the desired transformations,
the `modified` flag of the `CodedKern` node has to be set. This will let
Expand Down
13 changes: 8 additions & 5 deletions examples/gocean/eg3/ocl_trans.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
the first Invoke to use OpenCL. '''

from psyclone.psyGen import InvokeSchedule
from psyclone.psyir.transformations import \
FoldConditionalReturnExpressionsTrans
from psyclone.domain.gocean.transformations import GOOpenCLTrans, \
GOMoveIterationBoundariesInsideKernelTrans
from psyclone.psyir.transformations import (
FoldConditionalReturnExpressionsTrans)
from psyclone.domain.gocean.transformations import (
GOOpenCLTrans, GOMoveIterationBoundariesInsideKernelTrans)


def trans(psyir):
Expand All @@ -62,7 +62,10 @@ def trans(psyir):
move_boundaries_trans.apply(kern)
# Change the syntax to remove the return statements introduced by the
# previous transformation
fold_trans.apply(kern.get_kernel_schedule())
kschedules = kern.get_callees()
# NOTE: we assume the kernel is not polymorphic and thus there is
# only one schedule associated with it.
fold_trans.apply(kschedules[0])
# Specify the OpenCL queue and workgroup size of the kernel
# In this case we dispatch each kernel in a different queue to check
# that the output code has the necessary barriers to guarantee the
Expand Down
5 changes: 4 additions & 1 deletion examples/lfric/eg15/matvec_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,10 @@ def trans(psyir):

for kernel in psyir.coded_kernels():
if kernel.name.lower() == "matrix_vector_kernel_code":
kernel_schedule = kernel.get_kernel_schedule()
kernel_schedules = kernel.get_callees()
# For simplicity, ASSUME that the kernel is not polymorphic and
# thus only has one schedule.
kernel_schedule = kernel_schedules[0]
# Replace matmul with inline code
for icall in kernel_schedule.walk(IntrinsicCall):
if icall.intrinsic is IntrinsicCall.Intrinsic.MATMUL:
Expand Down
5 changes: 4 additions & 1 deletion examples/lfric/eg19/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,11 @@ $(EXEC): $(LFRIC_LIB) $(OBJ)

compile: transform $(EXEC)

# Runs PSyclone to do the code generation. Also demonstrates the use of the
# example 'kernel_print' transformation which prints the Fortran of each
# kernel found.
transform:
${PSYCLONE} -api lfric algorithm.x90 -opsy mixed_precision_psy.f90 -oalg alg.f90
${PSYCLONE} -api lfric algorithm.x90 -s ../scripts/kernel_print.py -opsy mixed_precision_psy.f90 -oalg alg.f90

alg.f90 mixed_precision_psy.f90: transform
alg.o: mixed_precision_psy.o
Expand Down
61 changes: 50 additions & 11 deletions examples/lfric/scripts/gpu_offloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,10 @@
import sys
from psyclone.domain.common.transformations import KernelModuleInlineTrans
from psyclone.domain.lfric import LFRicConstants
from psyclone.psyir.nodes import Directive, Loop, Routine
from psyclone.psyir.nodes import (
Call, Directive, IntrinsicCall, Loop, Routine, Schedule)
from psyclone.psyir.transformations import (
ACCKernelsTrans, TransformationError, OMPTargetTrans)
ACCKernelsTrans, Matmul2CodeTrans, OMPTargetTrans, TransformationError)
from psyclone.transformations import (
LFRicColourTrans, LFRicOMPLoopTrans,
LFRicRedundantComputationTrans, OMPParallelTrans,
Expand All @@ -59,9 +60,38 @@
INVOKE_EXCLUSIONS = [
]

# We won't attempt to inline calls to routines with names that contain
# these strings (because they're not computationally important).
INLINE_EXCLUSIONS = ["abort", "logging"]

OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none")


def _replace_matmuls(sched: Schedule):
'''
Attempts to replace all MATMUL intrinsic calls with inline
code.

:param sched: schedule to transform.

'''
matrans = Matmul2CodeTrans()

for call in sched.walk(Call):
call: Call
# The NVIDIA compiler (as at 25.3) will sometimes fail to compile
# code with calls to MATMUL with a claim that they are not
# available on the device, e.g.:
# Call to NVHPC runtime function not supported -
# pgf90_matmul_real4_i8
# Therefore, if we are unable to replace a MATMUL by generic code,
# the resulting TransformationError will signal (to the calling
# routine) that we are not to mark this kernel for offload.
if (isinstance(call, IntrinsicCall) and
call.intrinsic == IntrinsicCall.Intrinsic.MATMUL):
matrans.apply(call)


def trans(psyir):
'''Applies PSyclone colouring and GPU offloading transformations. Any
kernels that cannot be offloaded to GPU are parallelised using OpenMP
Expand Down Expand Up @@ -106,7 +136,7 @@ def trans(psyir):

for subroutine in psyir.walk(Routine):

print("Transforming invoke '{0}' ...".format(subroutine.name))
print(f"Transforming invoke '{subroutine.name}' ...")

# Make setval_* compute redundantly to the level 1 halo if it
# is in its own loop
Expand All @@ -122,7 +152,8 @@ def trans(psyir):
else:
offload = True

# Keep a record of any kernels we fail to offload
# Keep a record of any kernels we fail to offload.
failed_inline = set()
failed_to_offload = set()

# Colour loops over cells unless they are on discontinuous spaces
Expand All @@ -133,29 +164,37 @@ def trans(psyir):
const.VALID_DISCONTINUOUS_NAMES):
ctrans.apply(loop)

# Mark Kernels inside the loops over cells as GPU-enabled
# (alternatively we could inline them)
# Module-inline the Kernels inside the loops over cells and then mark
# them as GPU-enabled.
# (The latter step won't be necessary if/when we fully inline them.)
for loop in subroutine.loops():
if loop.iteration_space.endswith("cell_column"):
if offload:
for kern in loop.kernels():
# Attempt to module-inline the kernel.
try:
mod_inline_trans.apply(kern)
print(f"Module-inlined kernel '{kern.name}'")
except TransformationError as err:
print(f"Failed to module-inline '{kern.name}' due "
f"to:\n{err.value}")
failed_inline.add(kern.name.lower())
print(f"Failed to module-inline kernel "
f"'{kern.name}' due to:\n{err.value}")
try:
# Ensure any MATMULs within the kernel are
# replaced.
for routine in kern.get_callees():
_replace_matmuls(routine)
# Finally, annotate the kernel routine for GPU.
gpu_annotation_trans.apply(kern)
print(f"Annotated kernel '{kern.name}'")
except TransformationError as err:
failed_to_offload.add(kern.name.lower())
print(f"Failed to annotate '{kern.name}' with "
f"GPU-enabled directive due to:\n"
f"{err.value}")
# For annotated or inlined kernels we could attempt to
# provide compile-time dimensions for the temporary
# arrays and convert to code unsupported intrinsics.
# For annotated/inlined kernels we could attempt to
# provide compile-time dimensions for temporary arrays
# and convert to code any unsupported intrinsics.

# Add GPU offloading to loops unless they are over colours or are null.
for loop in subroutine.walk(Loop):
Expand Down
12 changes: 6 additions & 6 deletions examples/lfric/scripts/kernel_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ def trans(psyir):
# Loop over all of the Kernels Calls
for kernel in psyir.coded_kernels():
try:
kernel_schedule = kernel.get_kernel_schedule()
if kernel_schedule not in already_printed:
kern = fortran_writer(kernel_schedule)
print(kern)
already_printed.append(kernel_schedule)
for ksched in kernel.get_callees():
if ksched not in already_printed:
kern = fortran_writer(ksched)
print(kern)
already_printed.append(ksched)
except Exception as err: # pylint: disable=broad-except
print(f"Code of '{kernel.name}' in "
print(f"Code of '{kernel.name}' "
f"cannot be printed because:\n{err}")
4 changes: 3 additions & 1 deletion examples/xdsl/backend/xdsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,9 @@ def checkIfStringIsType(self, string, typ):

def nemokern_node(self, node):
exec_statements = []
schedule = node.get_kernel_schedule()
schedules = node.get_callees()
# IGNORE polymorphic routines.
schedule = schedules[0]
for child in schedule.children:
exec_statements.append(self._visit(child))
return exec_statements
Expand Down
Loading
Loading