Source code for psyclone.psyir.transformations.acc_kernels_trans

# -----------------------------------------------------------------------------
# BSD 3-Clause License
#
# Copyright (c) 2017-2025, Science and Technology Facilities Council.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Authors R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab
#         A. B. G. Chalk STFC Daresbury Lab
#         J. Henrichs, Bureau of Meteorology
# Modified I. Kavcic, J. G. Wallwork, O. Brunt and L. Turner, Met Office
#          S. Valat, Inria / Laboratoire Jean Kuntzmann
#          M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann

''' This module provides the ACCKernelsTrans transformation. '''

import re
from typing import Any, Dict, List, Union

from psyclone import psyGen
from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin
from psyclone.psyir.nodes import (
    ACCEnterDataDirective, ACCKernelsDirective, Assignment,
    Call, CodeBlock, Literal, Loop, Node,
    PSyDataNode, Reference, Return, Routine, Statement, WhileLoop)
from psyclone.psyir.symbols import INTEGER_TYPE, UnsupportedFortranType
from psyclone.psyir.transformations.arrayassignment2loops_trans import (
    ArrayAssignment2LoopsTrans)
from psyclone.psyir.transformations.region_trans import RegionTrans
from psyclone.psyir.transformations.transformation_error import (
    TransformationError)


[docs] class ACCKernelsTrans(RegionTrans): ''' Enclose a sub-set of nodes from a Schedule within an OpenACC kernels region (i.e. within "!$acc kernels" ... "!$acc end kernels" directives). For example: >>> from psyclone.psyir.frontend import FortranReader >>> psyir = FortranReader().psyir_from_source(NEMO_SOURCE_FILE) >>> >>> from psyclone.psyir.transformations import ACCKernelsTrans >>> ktrans = ACCKernelsTrans() >>> >>> schedule = psyir.children[0] >>> # Uncomment the following line to see a text view of the schedule >>> # print(schedule.view()) >>> kernels = schedule.children[9] >>> # Transform the kernel >>> ktrans.apply(kernels) ''' excluded_node_types = (CodeBlock, Return, PSyDataNode, psyGen.HaloExchange, WhileLoop)
[docs] def apply( self, node: Union[Node, List[Node]], options: Dict[str, Any] = {} ): ''' Enclose the supplied list of PSyIR nodes within an OpenACC Kernels region. :param node: a node or list of nodes in the PSyIR to enclose. :param options: a dictionary with options for transformations. :param bool options["default_present"]: whether or not the kernels region should have the 'default present' attribute (indicating that data is already on the accelerator). When using managed memory this option should be False. :param bool options["disable_loop_check"]: whether to disable the check that the supplied region contains 1 or more loops. Default is False (i.e. the check is enabled). :param options["async_queue"]: whether or not to add the 'async' clause to the new directive and if so, which queue to associate it with. True to enable for the default queue or a queue value specified with an int or PSyIR expression. :type options["async_queue"]: Union[bool, :py:class:`psyclone.psyir.nodes.DataNode`] :param bool options["allow_string"]: whether to allow the transformation on assignments involving character types. Defaults to False. :param bool options["verbose"]: log the reason the validation failed, at the moment with a comment in the provided PSyIR node. ''' # Ensure we are always working with a list of nodes, even if only # one was supplied via the `node` argument. node_list = self.get_node_list(node) self.validate(node_list, options) parent = node_list[0].parent start_index = node_list[0].position default_present = options.get("default_present", False) async_queue = options.get("async_queue", False) # Create a directive containing the nodes in node_list and insert it. directive = ACCKernelsDirective( parent=parent, children=[node.detach() for node in node_list], default_present=default_present, async_queue=async_queue) parent.children.insert(start_index, directive)
[docs] @staticmethod def check_async_queue( nodes: List[Node], async_queue: Union[bool, int, Reference] ): ''' Common function to check that all parent data directives have the same async queue. :param node: the nodes in the PSyIR to enclose. :param async_queue: The async queue to expect in ancestors. :raises TypeError: if the supplied queue is of the wrong type. :raises TransformationError: if the supplied queue does not match that specified by any ancestor nodes. ''' def _to_str(val): return (f"'{val.debug_string()}'" if isinstance(val, Node) else "None") if async_queue is False: # The kernels directive will not have the async clause. return # check type (a bool is an instance of int) and ensure the supplied # value is in a form suitable for comparison with values already # stored in the PSyIR. if isinstance(async_queue, bool): # A value of True means that async is specified with no queue. checkval = None elif isinstance(async_queue, int): checkval = Literal(f"{async_queue}", INTEGER_TYPE) elif isinstance(async_queue, Reference): checkval = async_queue else: raise TypeError(f"Invalid async_queue value, expect Reference or " f"integer or None or bool, got : {async_queue}") # Perform an additional check whether a queue has been used before. # Note this to work only for the current routine. parent = nodes[0].ancestor(ACCAsyncMixin) if parent is not None: if checkval != parent.async_queue: raise TransformationError( f"Cannot apply ACCKernelsTrans with asynchronous " f"queue {_to_str(checkval)} because a parent directive " f"specifies queue {_to_str(parent.async_queue)}") parent = nodes[0].ancestor(Routine) if parent: edata = parent.walk(ACCEnterDataDirective) if edata: if checkval != edata[0].async_queue: raise TransformationError( f"Cannot apply ACCKernelsTrans with asynchronous queue" f" {_to_str(checkval)} because the containing routine " f"has an ENTER DATA directive specifying queue " f"{_to_str(edata[0].async_queue)}")
[docs] def validate( self, nodes: Union[Node, List[Node]], options: Dict[str, Any] = {} ) -> None: # pylint: disable=signature-differs ''' Check that we can safely enclose the supplied node or list of nodes within OpenACC kernels ... end kernels directives. :param nodes: the proposed PSyIR node or nodes to enclose in the kernels region. :param options: a dictionary with options for transformations. :param bool options["default_present"]: whether or not the kernels region should have the 'default present' attribute (indicating that data is already on the accelerator). When using managed memory this option should be False. :param bool options["disable_loop_check"]: whether to disable the check that the supplied region contains 1 or more loops. Default is False (i.e. the check is enabled). :param options["async_queue"]: whether or not to add the 'async' clause to the new directive and if so, which queue to associate it with. True to enable for the default queue or a queue value specified with an int or PSyIR expression. :type options["async_queue"]: Union[bool, :py:class:`psyclone.psyir.nodes.DataNode`] :param bool options["allow_string"]: whether to allow the transformation on assignments involving character types. Defaults to False. :param bool options["verbose"]: log the reason the validation failed, at the moment with a comment in the provided PSyIR node. :raises NotImplementedError: if the supplied Nodes belong to a GOInvokeSchedule. :raises TransformationError: if there is an access to an assumed-size character variable within the region. :raises TransformationError: if the proposed region contains a call to a routine that is not available on the accelerator. :raises TransformationError: if there are no Loops within the proposed region and options["disable_loop_check"] is not True. :raises TransformationError: if any assignments in the region contain a character type child and options["allow_string"] is not True. ''' if not options: options = {} # Ensure we are always working with a list of nodes, even if only # one was supplied via the `nodes` argument. node_list = self.get_node_list(nodes) # Check that the front-end is valid # pylint: disable-next=import-outside-toplevel from psyclone.gocean1p0 import GOInvokeSchedule if node_list[0].ancestor(GOInvokeSchedule): raise NotImplementedError( "OpenACC kernels regions are not currently supported for " "GOcean InvokeSchedules") super().validate(node_list, options) # The regex we use to determine whether a character declaration is # of assumed size ('LEN=*' or '*(*)'). # TODO #2612 - improve the fparser2 frontend support for character # declarations. assumed_size = re.compile(r"\(\s*len\s*=\s*\*\s*\)|\*\s*\(\s*\*\s*\)") # Construct a list of any symbols that correspond to assumed-size # character strings. These can only be routine arguments. char_syms = [] parent_routine = node_list[0].ancestor(Routine) if parent_routine: arg_syms = parent_routine.symbol_table.argument_datasymbols for sym in arg_syms: # Currently the fparser2 frontend does not support any type # of LEN= specification on a character variable so we resort # to a regex to check whether it is assumed-size. if isinstance(sym.datatype, UnsupportedFortranType): type_txt = sym.datatype.type_text.lower() if (type_txt.startswith("character") and assumed_size.search(type_txt)): char_syms.append(sym) for node in node_list: # Check that there are no assumed-size character variables as these # cause an Internal Compiler Error with (at least) NVHPC <= 24.5. for ref in node.walk(Reference): if ref.symbol in char_syms: stmt = ref.ancestor(Statement) raise TransformationError( f"Assumed-size character variables cannot be enclosed " f"in an OpenACC region but found " f"'{stmt.debug_string()}'") # Check there are no character assignments in the region as these # cause various problems with (at least) NVHPC <= 24.5 if not options.get("allow_string", False): message = ( f"{self.name} does not permit assignments involving " f"character variables by default (use the 'allow_string' " f"option to include them)") for assign in node.walk(Assignment): ArrayAssignment2LoopsTrans.validate_no_char( assign, message, options) # Check that any called routines are supported on the device. for icall in node.walk(Call): if not icall.is_available_on_device(): raise TransformationError( f"Cannot include '{icall.debug_string()}' in an " f"OpenACC region because it is not available on GPU.") # extract async option and check validity async_queue = options.get('async_queue', False) self.check_async_queue(node_list, async_queue) # Check that we have at least one loop or array range within # the proposed region unless this has been disabled. if options and options.get("disable_loop_check", False): return for node in node_list: if (any(assign for assign in node.walk(Assignment) if assign.is_array_assignment) or node.walk(Loop)): break else: # Branch executed if loop does not exit with a break raise TransformationError( "A kernels transformation must enclose at least one loop or " "array range but none were found.")