Source code for psyclone.psyir.transformations.acc_kernels_trans

# -----------------------------------------------------------------------------
# BSD 3-Clause License
#
# Copyright (c) 2017-2026, Science and Technology Facilities Council.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Authors R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab
#         A. B. G. Chalk STFC Daresbury Lab
#         J. Henrichs, Bureau of Meteorology
# Modified I. Kavcic, J. G. Wallwork, O. Brunt and L. Turner, Met Office
#          S. Valat, Inria / Laboratoire Jean Kuntzmann
#          M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann

''' This module provides the ACCKernelsTrans transformation. '''

from typing import Any, Dict, Union
import warnings

from psyclone import psyGen
from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin
from psyclone.psyir.nodes import (
    ACCEnterDataDirective, ACCKernelsDirective, Assignment,
    Call, CodeBlock, Literal, Loop, Node,
    PSyDataNode, Reference, Return, Routine, Statement, WhileLoop)
from psyclone.psyir.symbols import (
    ArrayType, DataTypeSymbol, ScalarType, UnsupportedFortranType)
from psyclone.psyir.transformations.arrayassignment2loops_trans import (
    ArrayAssignment2LoopsTrans)
from psyclone.psyir.transformations.region_trans import RegionTrans
from psyclone.psyir.transformations.transformation_error import (
    TransformationError)
from psyclone.utils import transformation_documentation_wrapper


[docs] @transformation_documentation_wrapper class ACCKernelsTrans(RegionTrans): ''' Enclose a sub-set of nodes from a Schedule within an OpenACC kernels region (i.e. within "!$acc kernels" ... "!$acc end kernels" directives). For example: >>> from psyclone.psyir.frontend import FortranReader >>> psyir = FortranReader().psyir_from_source(NEMO_SOURCE_FILE) >>> >>> from psyclone.psyir.transformations import ACCKernelsTrans >>> ktrans = ACCKernelsTrans() >>> >>> schedule = psyir.children[0] >>> # Uncomment the following line to see a text view of the schedule >>> # print(schedule.view()) >>> kernels = schedule.children[9] >>> # Transform the kernel >>> ktrans.apply(kernels) ''' excluded_node_types = (CodeBlock, Return, PSyDataNode, psyGen.HaloExchange, WhileLoop)
[docs] def apply( self, node: Union[Node, list[Node]], options: Dict[str, Any] = {}, default_present: bool = False, disable_loop_check: bool = False, async_queue: Union[bool, Reference, int] = False, allow_strings: bool = False, verbose: bool = False, **kwargs ): ''' Enclose the supplied list of PSyIR nodes within an OpenACC Kernels region. :param node: a node or list of nodes in the PSyIR to enclose. :param options: a dictionary with options for transformations. :param default_present: whether or not the kernels region should have the 'default present' attribute (indicating that data is already on the accelerator). When using managed memory this option should be False. :param disable_loop_check: whether to disable the check that the supplied region contains 1 or more loops. Default is False (i.e. the check is enabled). :param async_queue: whether or not to add the 'async' clause to the new directive and if so, which queue to associate it with. True to enable for the default queue or a queue value specified with an int or PSyIR expression. :param allow_strings: whether to allow the transformation on assignments involving character types. Defaults to False. :param verbose: log the reason the validation failed, at the moment with a comment in the provided PSyIR node. ''' # Ensure we are always working with a list of nodes, even if only # one was supplied via the `node` argument. node_list = self.get_node_list(node) if options: default_present = options.get("default_present", False) async_queue = options.get("async_queue", False) self.validate( node_list, options=options, default_present=default_present, disable_loop_check=disable_loop_check, async_queue=async_queue, allow_strings=allow_strings, verbose=verbose, **kwargs ) parent = node_list[0].parent start_index = node_list[0].position # Create a directive containing the nodes in node_list and insert it. directive = ACCKernelsDirective( parent=parent, children=[node.detach() for node in node_list], default_present=default_present, async_queue=async_queue) parent.children.insert(start_index, directive)
[docs] @staticmethod def check_async_queue( nodes: list[Node], async_queue: Union[bool, int, Reference] ): ''' Common function to check that all parent data directives have the same async queue. :param node: the nodes in the PSyIR to enclose. :param async_queue: The async queue to expect in ancestors. :raises TypeError: if the supplied queue is of the wrong type. :raises TransformationError: if the supplied queue does not match that specified by any ancestor nodes. ''' def _to_str(val): return (f"'{val.debug_string()}'" if isinstance(val, Node) else "None") if async_queue is False: # The kernels directive will not have the async clause. return # check type (a bool is an instance of int) and ensure the supplied # value is in a form suitable for comparison with values already # stored in the PSyIR. if isinstance(async_queue, bool): # A value of True means that async is specified with no queue. checkval = None elif isinstance(async_queue, int): checkval = Literal(f"{async_queue}", ScalarType.integer_type()) elif isinstance(async_queue, Reference): checkval = async_queue else: raise TypeError(f"Invalid async_queue value, expect Reference or " f"integer or None or bool, got : {async_queue}") # Perform an additional check whether a queue has been used before. # Note this to work only for the current routine. parent = nodes[0].ancestor(ACCAsyncMixin) if parent is not None: if checkval != parent.async_queue: raise TransformationError( f"Cannot apply ACCKernelsTrans with asynchronous " f"queue {_to_str(checkval)} because a parent directive " f"specifies queue {_to_str(parent.async_queue)}") parent = nodes[0].ancestor(Routine) if parent: edata = parent.walk(ACCEnterDataDirective) if edata: if checkval != edata[0].async_queue: raise TransformationError( f"Cannot apply ACCKernelsTrans with asynchronous queue" f" {_to_str(checkval)} because the containing routine " f"has an ENTER DATA directive specifying queue " f"{_to_str(edata[0].async_queue)}")
[docs] def validate( self, nodes: Union[Node, list[Node]], options: Dict[str, Any] = {}, **kwargs ) -> None: # pylint: disable=signature-differs ''' Check that we can safely enclose the supplied node or list of nodes within OpenACC kernels ... end kernels directives. :param nodes: the proposed PSyIR node or nodes to enclose in the kernels region. :param options: a dictionary with options for transformations. :raises NotImplementedError: if the supplied Nodes belong to a GOInvokeSchedule. :raises TransformationError: if there is an access to an assumed-size character variable within the region. :raises TransformationError: if the proposed region contains a call to a routine that is not available on the accelerator. :raises TransformationError: if there are no Loops within the proposed region and options["disable_loop_check"] is not True. :raises TransformationError: if any assignments in the region contain a character type child and options["allow_strings"] is not True. ''' if options: # TODO #2668: Deprecate options dictionary. warnings.warn(self._deprecation_warning, DeprecationWarning, 2) allow_strings = options.get("allow_strings", False) async_queue = options.get("async_queue", False) disable_loop_check = options.get("disable_loop_check", False) verbose = options.get("verbose", False) else: self.validate_options(**kwargs) allow_strings = self.get_option("allow_strings", **kwargs) async_queue = self.get_option("async_queue", **kwargs) disable_loop_check = self.get_option("disable_loop_check", **kwargs) verbose = self.get_option("verbose", **kwargs) # Ensure we are always working with a list of nodes, even if only # one was supplied via the `nodes` argument. node_list = self.get_node_list(nodes) # Check that the front-end is valid # pylint: disable-next=import-outside-toplevel from psyclone.gocean1p0 import GOInvokeSchedule if node_list[0].ancestor(GOInvokeSchedule): raise NotImplementedError( "OpenACC kernels regions are not currently supported for " "GOcean InvokeSchedules") super().validate(node_list, options, **kwargs) # Construct a list of any symbols that correspond to assumed-size # character strings. These can only be routine arguments. char_syms = [] parent_routine = node_list[0].ancestor(Routine) if parent_routine: arg_syms = parent_routine.symbol_table.argument_datasymbols for sym in arg_syms: dtype = sym.datatype if isinstance(dtype, UnsupportedFortranType): dtype = dtype.partial_datatype if not dtype: continue if isinstance(dtype, DataTypeSymbol): continue if dtype.intrinsic != ScalarType.Intrinsic.CHARACTER: continue if isinstance(dtype, ArrayType): dtype = dtype.elemental_type if isinstance(dtype.length, ScalarType.CharLengthParameter): char_syms.append(sym) for node in node_list: # Check that there are no assumed-size character variables as these # cause an Internal Compiler Error with (at least) NVHPC <= 24.5. for ref in node.walk(Reference): if ref.symbol in char_syms: stmt = ref.ancestor(Statement) raise TransformationError( f"Assumed-size character variables cannot be enclosed " f"in an OpenACC region but found " f"'{stmt.debug_string()}'") # Check there are no character assignments in the region as these # cause various problems with (at least) NVHPC <= 24.5 if not allow_strings: message = ( f"{self.name} does not permit assignments involving " f"character variables by default (use the 'allow_strings' " f"option to include them)") for assign in node.walk(Assignment): ArrayAssignment2LoopsTrans.validate_no_char( assign, message, verbose) # Check that any called routines are supported on the device. for icall in node.walk(Call): if not icall.is_available_on_device(): raise TransformationError( f"Cannot include '{icall.debug_string()}' in an " f"OpenACC region because it is not available on GPU.") # Check the validity of the supplied async option (if any). self.check_async_queue(node_list, async_queue) # Check that we have at least one loop or array range within # the proposed region unless this has been disabled. if disable_loop_check: return for node in node_list: if (any(assign for assign in node.walk(Assignment) if assign.is_array_assignment) or node.walk(Loop)): break else: # Branch executed if loop does not exit with a break raise TransformationError( "A kernels transformation must enclose at least one loop or " "array range but none were found.")