# -----------------------------------------------------------------------------
# BSD 3-Clause License
#
# Copyright (c) 2017-2026, Science and Technology Facilities Council.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Authors R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab
# A. B. G. Chalk STFC Daresbury Lab
# J. Henrichs, Bureau of Meteorology
# Modified I. Kavcic, J. G. Wallwork, O. Brunt and L. Turner, Met Office
# S. Valat, Inria / Laboratoire Jean Kuntzmann
# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann
''' This module provides the ACCKernelsTrans transformation. '''
from typing import Any, Dict, Union
import warnings
from psyclone import psyGen
from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin
from psyclone.psyir.nodes import (
ACCEnterDataDirective, ACCKernelsDirective, Assignment,
Call, CodeBlock, Literal, Loop, Node,
PSyDataNode, Reference, Return, Routine, Statement, WhileLoop)
from psyclone.psyir.symbols import (
ArrayType, DataTypeSymbol, ScalarType, UnsupportedFortranType)
from psyclone.psyir.transformations.arrayassignment2loops_trans import (
ArrayAssignment2LoopsTrans)
from psyclone.psyir.transformations.region_trans import RegionTrans
from psyclone.psyir.transformations.transformation_error import (
TransformationError)
from psyclone.utils import transformation_documentation_wrapper
[docs]
@transformation_documentation_wrapper
class ACCKernelsTrans(RegionTrans):
'''
Enclose a sub-set of nodes from a Schedule within an OpenACC kernels
region (i.e. within "!$acc kernels" ... "!$acc end kernels" directives).
For example:
>>> from psyclone.psyir.frontend import FortranReader
>>> psyir = FortranReader().psyir_from_source(NEMO_SOURCE_FILE)
>>>
>>> from psyclone.psyir.transformations import ACCKernelsTrans
>>> ktrans = ACCKernelsTrans()
>>>
>>> schedule = psyir.children[0]
>>> # Uncomment the following line to see a text view of the schedule
>>> # print(schedule.view())
>>> kernels = schedule.children[9]
>>> # Transform the kernel
>>> ktrans.apply(kernels)
'''
excluded_node_types = (CodeBlock, Return, PSyDataNode,
psyGen.HaloExchange, WhileLoop)
[docs]
def apply(
self,
node: Union[Node, list[Node]],
options: Dict[str, Any] = {},
default_present: bool = False,
disable_loop_check: bool = False,
async_queue: Union[bool, Reference, int] = False,
allow_strings: bool = False,
verbose: bool = False,
**kwargs
):
'''
Enclose the supplied list of PSyIR nodes within an OpenACC
Kernels region.
:param node: a node or list of nodes in the PSyIR to enclose.
:param options: a dictionary with options for transformations.
:param default_present: whether or not the kernels
region should have the 'default present' attribute (indicating
that data is already on the accelerator). When using managed
memory this option should be False.
:param disable_loop_check: whether to disable the check
that the supplied region contains 1 or more loops. Default is False
(i.e. the check is enabled).
:param async_queue: whether or not to add the 'async' clause
to the new directive and if so, which queue to associate it with.
True to enable for the default queue or a queue value specified
with an int or PSyIR expression.
:param allow_strings: whether to allow the
transformation on assignments involving character types. Defaults
to False.
:param verbose: log the reason the validation failed,
at the moment with a comment in the provided PSyIR node.
'''
# Ensure we are always working with a list of nodes, even if only
# one was supplied via the `node` argument.
node_list = self.get_node_list(node)
if options:
default_present = options.get("default_present", False)
async_queue = options.get("async_queue", False)
self.validate(
node_list, options=options,
default_present=default_present,
disable_loop_check=disable_loop_check,
async_queue=async_queue,
allow_strings=allow_strings,
verbose=verbose,
**kwargs
)
parent = node_list[0].parent
start_index = node_list[0].position
# Create a directive containing the nodes in node_list and insert it.
directive = ACCKernelsDirective(
parent=parent, children=[node.detach() for node in node_list],
default_present=default_present, async_queue=async_queue)
parent.children.insert(start_index, directive)
[docs]
@staticmethod
def check_async_queue(
nodes: list[Node],
async_queue: Union[bool, int, Reference]
):
'''
Common function to check that all parent data directives have
the same async queue.
:param node: the nodes in the PSyIR to enclose.
:param async_queue: The async queue to expect in ancestors.
:raises TypeError: if the supplied queue is of the wrong type.
:raises TransformationError: if the supplied queue does not match
that specified by any ancestor nodes.
'''
def _to_str(val):
return (f"'{val.debug_string()}'" if isinstance(val, Node)
else "None")
if async_queue is False:
# The kernels directive will not have the async clause.
return
# check type (a bool is an instance of int) and ensure the supplied
# value is in a form suitable for comparison with values already
# stored in the PSyIR.
if isinstance(async_queue, bool):
# A value of True means that async is specified with no queue.
checkval = None
elif isinstance(async_queue, int):
checkval = Literal(f"{async_queue}", ScalarType.integer_type())
elif isinstance(async_queue, Reference):
checkval = async_queue
else:
raise TypeError(f"Invalid async_queue value, expect Reference or "
f"integer or None or bool, got : {async_queue}")
# Perform an additional check whether a queue has been used before.
# Note this to work only for the current routine.
parent = nodes[0].ancestor(ACCAsyncMixin)
if parent is not None:
if checkval != parent.async_queue:
raise TransformationError(
f"Cannot apply ACCKernelsTrans with asynchronous "
f"queue {_to_str(checkval)} because a parent directive "
f"specifies queue {_to_str(parent.async_queue)}")
parent = nodes[0].ancestor(Routine)
if parent:
edata = parent.walk(ACCEnterDataDirective)
if edata:
if checkval != edata[0].async_queue:
raise TransformationError(
f"Cannot apply ACCKernelsTrans with asynchronous queue"
f" {_to_str(checkval)} because the containing routine "
f"has an ENTER DATA directive specifying queue "
f"{_to_str(edata[0].async_queue)}")
[docs]
def validate(
self,
nodes: Union[Node, list[Node]],
options: Dict[str, Any] = {},
**kwargs
) -> None:
# pylint: disable=signature-differs
'''
Check that we can safely enclose the supplied node or list of nodes
within OpenACC kernels ... end kernels directives.
:param nodes: the proposed PSyIR node or nodes to enclose in the
kernels region.
:param options: a dictionary with options for transformations.
:raises NotImplementedError: if the supplied Nodes belong to
a GOInvokeSchedule.
:raises TransformationError: if there is an access to an assumed-size
character variable within the region.
:raises TransformationError: if the proposed region contains a call to
a routine that is not available on the accelerator.
:raises TransformationError: if there are no Loops within the
proposed region and options["disable_loop_check"] is not True.
:raises TransformationError: if any assignments in the region contain a
character type child and options["allow_strings"] is not True.
'''
if options:
# TODO #2668: Deprecate options dictionary.
warnings.warn(self._deprecation_warning, DeprecationWarning, 2)
allow_strings = options.get("allow_strings", False)
async_queue = options.get("async_queue", False)
disable_loop_check = options.get("disable_loop_check", False)
verbose = options.get("verbose", False)
else:
self.validate_options(**kwargs)
allow_strings = self.get_option("allow_strings", **kwargs)
async_queue = self.get_option("async_queue", **kwargs)
disable_loop_check = self.get_option("disable_loop_check",
**kwargs)
verbose = self.get_option("verbose", **kwargs)
# Ensure we are always working with a list of nodes, even if only
# one was supplied via the `nodes` argument.
node_list = self.get_node_list(nodes)
# Check that the front-end is valid
# pylint: disable-next=import-outside-toplevel
from psyclone.gocean1p0 import GOInvokeSchedule
if node_list[0].ancestor(GOInvokeSchedule):
raise NotImplementedError(
"OpenACC kernels regions are not currently supported for "
"GOcean InvokeSchedules")
super().validate(node_list, options, **kwargs)
# Construct a list of any symbols that correspond to assumed-size
# character strings. These can only be routine arguments.
char_syms = []
parent_routine = node_list[0].ancestor(Routine)
if parent_routine:
arg_syms = parent_routine.symbol_table.argument_datasymbols
for sym in arg_syms:
dtype = sym.datatype
if isinstance(dtype, UnsupportedFortranType):
dtype = dtype.partial_datatype
if not dtype:
continue
if isinstance(dtype, DataTypeSymbol):
continue
if dtype.intrinsic != ScalarType.Intrinsic.CHARACTER:
continue
if isinstance(dtype, ArrayType):
dtype = dtype.elemental_type
if isinstance(dtype.length, ScalarType.CharLengthParameter):
char_syms.append(sym)
for node in node_list:
# Check that there are no assumed-size character variables as these
# cause an Internal Compiler Error with (at least) NVHPC <= 24.5.
for ref in node.walk(Reference):
if ref.symbol in char_syms:
stmt = ref.ancestor(Statement)
raise TransformationError(
f"Assumed-size character variables cannot be enclosed "
f"in an OpenACC region but found "
f"'{stmt.debug_string()}'")
# Check there are no character assignments in the region as these
# cause various problems with (at least) NVHPC <= 24.5
if not allow_strings:
message = (
f"{self.name} does not permit assignments involving "
f"character variables by default (use the 'allow_strings' "
f"option to include them)")
for assign in node.walk(Assignment):
ArrayAssignment2LoopsTrans.validate_no_char(
assign, message, verbose)
# Check that any called routines are supported on the device.
for icall in node.walk(Call):
if not icall.is_available_on_device():
raise TransformationError(
f"Cannot include '{icall.debug_string()}' in an "
f"OpenACC region because it is not available on GPU.")
# Check the validity of the supplied async option (if any).
self.check_async_queue(node_list, async_queue)
# Check that we have at least one loop or array range within
# the proposed region unless this has been disabled.
if disable_loop_check:
return
for node in node_list:
if (any(assign for assign in node.walk(Assignment)
if assign.is_array_assignment) or node.walk(Loop)):
break
else:
# Branch executed if loop does not exit with a break
raise TransformationError(
"A kernels transformation must enclose at least one loop or "
"array range but none were found.")