Source code for psyclone.psyir.transformations.omp_taskwait_trans

# -----------------------------------------------------------------------------
# BSD 3-Clause License
#
# Copyright (c) 2021-2024, Science and Technology Facilities Council.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Author: A. B. G. Chalk, STFC Daresbury Lab
# Modified: R. W. Ford, N. Nobre and S. Siso, STFC Daresbury Lab

''' This module provides the OMPTaskwaitTrans transformation that can be
applied to an OMPParallelDirective to satisfy any task-based dependencies
created by OpenMP Taskloops.'''
from __future__ import absolute_import, print_function

from psyclone.core import VariablesAccessInfo
from psyclone.errors import LazyString, InternalError
from psyclone.psyGen import Transformation
from psyclone.psyir import nodes
from psyclone.psyir.nodes import Loop, Schedule, \
    OMPDoDirective, OMPTaskloopDirective, OMPSerialDirective, \
    OMPTaskwaitDirective, OMPSingleDirective, OMPParallelDirective
from psyclone.psyir.transformations.transformation_error import \
        TransformationError


[docs]class OMPTaskwaitTrans(Transformation): ''' Adds zero or more OpenMP Taskwait directives to an OMP parallel region. This transformation will add directives to satisfy dependencies between Taskloop directives without an associated taskgroup (i.e. no nogroup clause). It also tries to minimise the number added to maximise available parallelism. For example: >>> from pysclone.parse.algorithm import parse >>> from psyclone.psyGen import PSyFactory >>> api = "gocean1.0" >>> filename = "nemolite2d_alg.f90" >>> ast, invokeInfo = parse(filename, api=api, invoke_name="invoke") >>> psy = PSyFactory(api).create(invokeInfo) >>> >>> from psyclone.transformations import OMPParallelTrans, OMPSingleTrans >>> from psyclone.transformations import OMPTaskloopTrans >>> from psyclone.psyir.transformations import OMPTaskwaitTrans >>> singletrans = OMPSingleTrans() >>> paralleltrans = OMPParallelTrans() >>> tasklooptrans = OMPTaskloopTrans() >>> taskwaittrans = OMPTaskwaitTrans() >>> >>> schedule = psy.invokes.get('invoke_0').schedule >>> print(schedule.view()) >>> >>> # Apply the OpenMP Taskloop transformation to *every* loop >>> # in the schedule. >>> # This ignores loop dependencies. These are handled by the >>> # taskwait transformation. >>> for child in schedule.children: >>> tasklooptrans.apply(child, nogroup = true) >>> # Enclose all of these loops within a single OpenMP >>> # SINGLE region >>> singletrans.apply(schedule.children) >>> # Enclose all of these loops within a single OpenMP >>> # PARALLEL region >>> paralleltrans.apply(schedule.children) >>> taskwaittrans.apply(schedule.children) >>> print(schedule.view()) ''' def __str__(self): rval = ("Adds 'OpenMP TASKWAIT' directives to an OpenMP parallel " "region to satisfy 'OpenMP TASKLOOP' dependencies") return rval def validate(self, node, options=None): ''' Validity checks for input arguments. :param node: the OMPParallelDirective node to validate. :type node: :py:class:`psyclone.psyir.nodes.OMPParallelDirective` :param options: a dictionary with options for transformations. :type options: Optional[Dict[str, Any]] :param bool options["fail_on_no_taskloop"]: indicating whether this should throw an error if no \ OMPTaskloop nodes are found in this tree. This can be \ safely disabled as if there are no Taskloop nodes the \ result of this transformation is valid OpenMP code. Default \ is True. :raises TransformationError: If the supplied node is not an \ OMPParallelDirective :raises TransformationError: If there are no OMPTaskloopDirective \ nodes in this tree. :raises TransformationError: If taskloop dependencies can't be \ satisfied due to dependencies across \ barrierless OpenMP Serial Regions. ''' fail_on_no_taskloop = True if options is not None: fail_on_no_taskloop = options.get("fail_on_no_taskloop", True) # Check the supplied node is an OMPParallelDirective if not isinstance(node, nodes.OMPParallelDirective): raise TransformationError(f"OMPTaskwaitTrans was supplied a " f"'{node.__class__.__name__}'" f" node, but expected an " f"OMPParallelDirective") # Find all the taskloops taskloops = node.walk(OMPTaskloopDirective) # Walk the tree to find any OMPTaskloopTrans if fail_on_no_taskloop and taskloops == []: raise TransformationError("OMPTaskwaitTrans was supplied an " "OMPParallelDirective that does not " "contain any OMPTaskloopDirectives") # Check that all of the dependencies are satisfiable for taskloop in taskloops: # Find the first RaW or WaR dependency for this taskloop. forward_dep = OMPTaskwaitTrans.get_forward_dependence(taskloop, node) if forward_dep is None: continue # Check if the taskloop and its forward dependence are in the # same serial region if (taskloop.ancestor(OMPSerialDirective) is not forward_dep.ancestor(OMPSerialDirective)): # They're not in the same serial region. Check if our # taskloop is in a waiting Single Directive ancestor = taskloop.ancestor(OMPSerialDirective) valid = isinstance(ancestor, nodes.OMPSingleDirective) if valid: valid = not ancestor.nowait # If not valid, then we're in a Master Directive or a # Single Directive with nowait. In this case we can't # safely guarantee our forward dependency so throw an error if not valid: # pylint: disable=cell-var-from-loop # Since "Backslashes may not appear inside the expression # portions of f-strings" via PEP 498, use chr(10) for '\n' raise TransformationError(LazyString( lambda: f"Couldn't satisfy the dependencies due to " f"taskloop dependencies across barrierless " f"OMP serial regions. Dependency is from\n" f"{taskloop.debug_string().rstrip(chr(10))}" f"\nto\n" f"{forward_dep.debug_string().rstrip(chr(10))}" )) @staticmethod def get_forward_dependence(taskloop, root): ''' Returns the next forward dependence for a taskloop using the dependence-analysis functionality provided by psyclone.psyir.tools.dependency_tools. Forward dependencies can be of the following types: Loop OMPDoDirective OMPTaskloopDirective OMPTaskwaitDirective (If in same OMPSingle and that single has nowait=False) OMPSingleDirective (If ancestor OMPSingle has nowait=False) Loop, OMPDoDirective, OMPTaskloopDirective types are returned when a following directive is found which has a RaW, WaR or WaW dependency to taskloop. An OMPTaskwaitDirective type is returned when a following directive is found inside the same parent OMPSingleDirective which has no nowait clause applied. An OMPSingleDirective type is returned when the first dependency is within a different OMPSerialDirective, and the ancestor of taskloop is an OMPSingleDirective with no nowait clause. The forward dependency is never a child of taskloop, and must have abs_position > taskloop.abs_position :param taskloop: the taskloop node for which to find the \ forward_dependence. :type taskloop: :py:class:`psyclone.psyir.nodes.OMPTaskloopDirective` :param root: the tree in which to search for the forward_dependence. :type root: :py:class:`psyclone.psyir.nodes.OMPParallelDirective` :returns: the forward_dependence of taskloop. :rtype: :py:class:`psyclone.f2pygen.Node` ''' # Check supplied the correct type for root if not isinstance(root, OMPParallelDirective): raise TransformationError(f"Expected the root of the tree in which" f" to look for a forward dependence to " f"be an instance of OMPParallelDirective" f", but was supplied an instance of " f"'{type(root).__name__}'") # We only look for specific types node_list = root.walk((Loop, OMPDoDirective, OMPTaskloopDirective, OMPTaskwaitDirective)) # Find the taskloop's variable access info. We need to skip over the # Loop variable writes from the Loop, so we skip the Loop children. taskloop_vars = VariablesAccessInfo() for child in taskloop.walk(nodes.Node): if child is not taskloop and not isinstance(child, (Schedule, Loop)): taskloop_vars.merge(VariablesAccessInfo(child)) taskloop_signatures = taskloop_vars.all_signatures # Find our parent serial region if it has a barrier parent_single = taskloop.ancestor(OMPSingleDirective) # Cache the parent single for use in later if statements cached_parent_single = parent_single # If the parent single region has a nowait clause it can never # act as the dependency for this taskloop, so we set it to None. # The same behaviour occurs implicitly if the parent # OMPSerialDirective is an OMPMasterDirective. Without a blocking # parent region we can never guarantee synchronicity if dependencies # are outside of the parent region. if parent_single is not None and parent_single.nowait: parent_single = None # Find our parent parallel region parent_parallel = taskloop.ancestor(OMPParallelDirective) # Raise an error if there is no parent_parallel region if parent_parallel is None: # Since "Backslashes may not appear inside the expression # portions of f-strings" via PEP 498, use chr(10) for '\n' raise InternalError(LazyString( lambda: f"No parent parallel directive was found for the " f"taskloop region: " f"{taskloop.debug_string().rstrip(chr(10))}")) for node in node_list: if node.abs_position <= taskloop.abs_position: continue # Ignore any children of the taskloop directive anc = node.ancestor(OMPTaskloopDirective) if anc is taskloop: continue node_vars = None if (isinstance(node, OMPTaskwaitDirective) and (cached_parent_single is node.ancestor(OMPSingleDirective)) and cached_parent_single is not None and cached_parent_single.nowait is False): # If we find a taskwait barrier inside the same # OMPSingleDirective and that OMPSingleDirective has a no # nowait clause, then it acts as a barrier for dependencies # as well, so we return it return node if not isinstance(node, OMPTaskwaitDirective): # For all our other node types we calculate their own # variable accesses node_vars = VariablesAccessInfo() for child in node.walk(nodes.Node): if child is not node and not isinstance(child, (Schedule, Loop)): refs = VariablesAccessInfo(child) if refs is not None: node_vars.merge(refs) node_signatures = node_vars.all_signatures # Once we have the node's variable accesses, check for collisions for sig1 in taskloop_signatures: # If this signature is not in the node signatures then continue if sig1 not in node_signatures: continue access1 = taskloop_vars[sig1] access2 = node_vars[sig1] # If both are only read we can ignore this signature # Otherwise, one of them writes so return this node as # we have a WaW, WaR or RaW dependency if access1.is_written() or access2.is_written(): # If we have a different parent serial node, and # parent_single is not None then our parent_single is our # dependency, otherwise this node is the dependency if (taskloop.ancestor(OMPSerialDirective) is not node.ancestor(OMPSerialDirective) and parent_single is not None): return parent_single return node # If we found no dependencies, then return that! return None @staticmethod def _eliminate_unneeded_dependencies(taskloop_positions, dependence_positions, dependence_nodes): ''' Eliminates unneeded dependencies from a set of taskloop positions, dependence_positions and dependence_nodes for a region of code. :param taskloop_positions: positions of the taskloops. :type taskloop_positions: list of int :param dependence_positions: positions of the taskloops' dependencies. :type dependence_positions: list of int :param dependence_nodes: the nodes respresenting the forward \ dependency of each taskloop node. :type dependence_nodes: list of :py:class:`psyclone.psyir.nodes.Node` :returns: the updated dependence_positions and dependence_nodes arrays :rval: 2-tuple of (list of int, list of :py:class:`psyclone.psyir.nodes.Node`) ''' # We loop over each dependence and perform the same operation: # For each dependence we loop over all other # taskloops whose position is after this taskloop, but before # this taskloop's forward dependency. If that dependence is # fulfilled by this dependence (i.e. # dependence_position[this] < dependence_position[that]) then # we remove that dependence. If this dependence is fulfilled # by that dependence (i.e. dependence_position[that] < # dependence_position[this]) then we remove this dependence. # This assumes that taskloop_positions is in ascending order, # which I believe to be true by construction. for i, dep_pos in enumerate(dependence_positions): # Corresponding taskloop has no dependency or was satisfied # by another dependency being resolved. if dep_pos is None: continue # Grab the position of the next dependence next_dependence = dep_pos # Loop over the other taskloops for j in range(i+1, len(dependence_positions)): # If this taskloop happens after the next_dependence # then we can just stop. if taskloop_positions[j] >= next_dependence: break # If the jth taskloop has no dependency then continue if dependence_positions[j] is None: continue # Check if next_dependence will satisfy the jth # taskloops dependency if next_dependence <= dependence_positions[j]: dependence_positions[j] = None dependence_nodes[j] = None continue # Check if the jth taskloop's dependence will satisfy # the next_dependence if dependence_positions[j] < next_dependence: dependence_positions[i] = None dependence_nodes[i] = None # If it does then we can move to the next taskloop break return dependence_positions, dependence_nodes
[docs] def apply(self, node, options=None): ''' Apply an OMPTaskwait Transformation to the supplied node (which must be an OMPParallelDirective). In the generated code this corresponds to adding zero or more OMPTaskwaitDirectives as appropriate: .. code-block:: fortran !$OMP PARALLEL ... !$OMP TASKWAIT ... !$OMP TASKWAIT ... !$OMP END PARALLEL :param node: the node to which to apply the transformation. :type node: :py:class:`psyclone.psyir.nodes.OMPParallelDirective` :param options: a dictionary with options for transformations\ and validation. :type options: Optional[Dict[str, Any]] :param bool options["fail_on_no_taskloop"]: indicating whether this should throw an error if no \ OMPTaskloop nodes are found in this tree. This can be \ safely disabled as if there are no Taskloop nodes the \ result of this transformation is valid OpenMP code. Default \ is True ''' self.validate(node, options=options) # Find all the OpenMP Single & Master regions task_regions = node.walk(OMPSerialDirective) # Loop over the task regions for task_region in task_regions: create_endtaskwait = False endwaits = [] # Find all of the taskloops taskloops = task_region.walk(OMPTaskloopDirective) # Get the positions of all of the taskloops taskloop_positions = [-1] * len(taskloops) # Get the forward_dependence position of all of the taskloops dependence_position = [None] * len(taskloops) dependence_node = [None] * len(taskloops) for i, taskloop in enumerate(taskloops): taskloop_positions[i] = taskloop.abs_position # Only set forward_dep for taskloops with nogroup set forward_dep = None if taskloop.nogroup: forward_dep = OMPTaskwaitTrans.get_forward_dependence( taskloop, node) # If the forward_dependence is one of our parents then we # should ignore it if (forward_dep is not None and forward_dep.abs_position < taskloop.abs_position): # If we're in a blocking single region and the # dependency for any of its tasks points to its # parent single region, then its "real" next dependency # is outside of the single region. To ensure we # synchronize before this dependency, we must have a # task synchronization construct before the spawning # thread leaves the single region. It is possible that # this dependency could be handled by another # intermediary taskwait, however I can't work out a # good way to detect that now (since there is no # "end of single region" abs_position to map to). # I could store a list of all the taskloops that # require this final taskwait, and walk from each one # to see if a taskwait has been placed to satisfy their # dependency, but that does not seem elegant somehow. if forward_dep is task_region: endwaits.append(taskloop) create_endtaskwait = True continue if forward_dep is None: continue # Check if the taskloop and its forward dependence are in the # same serial region if (taskloops[i].ancestor(OMPSerialDirective) is forward_dep.ancestor(OMPSerialDirective)): # We're in the same OMPSerialDirective so store the # position of the forward dependency dependence_position[i] = forward_dep.abs_position dependence_node[i] = forward_dep # Forward dependency positions are now computed for this region. dependence_position, dependence_node = \ OMPTaskwaitTrans._eliminate_unneeded_dependencies( taskloop_positions, dependence_position, dependence_node) # dependence_position now contains only the required dependencies # to satisfy the full superset of dependencies. We can loop over # these by index, and if dependence_position[i] is not None then # go to its forward dependency, and insert a TaskwaitDirective # immediately before. for i, dep_pos in enumerate(dependence_position): if dep_pos is not None: forward_dep = dependence_node[i] fdep_parent = forward_dep.parent # Find the position of the forward_dep in its parent's # children list loc = forward_dep.position # We've found the position, so we now insert an # OMPTaskwaitDirective in that location instead fdep_parent.addchild(OMPTaskwaitDirective(), loc) if create_endtaskwait: # For each taskloop that needed a taskwait at the end for taskloop in endwaits: taskloop_pos = taskloop.abs_position # Find all the taskwaits in the task_region node_list = task_region.walk(OMPTaskwaitDirective) # If every taskwait appears before this taskloop then # add a taskwait at the end of the serial region and # stop. if all(node1.abs_position < taskloop_pos for node1 in node_list): task_region.dir_body.addchild(OMPTaskwaitDirective()) break