Source code for psyclone.domain.lfric.lfric_loop

# -----------------------------------------------------------------------------
# BSD 3-Clause License
#
# Copyright (c) 2017-2026, Science and Technology Facilities Council.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Authors R. W. Ford, A. R. Porter and S. Siso, STFC Daresbury Lab
# Modified I. Kavcic, A. Coughtrie, L. Turner and O. Brunt, Met Office
# Modified J. Henrichs, Bureau of Meteorology
# Modified A. B. G. Chalk and N. Nobre, STFC Daresbury Lab

''' This module implements the PSyclone LFRic API by specialising the PSyLoop
    base class from psyGen.py.
    '''

from psyclone.configuration import Config
from psyclone.core import AccessType
from psyclone.domain.common.psylayer import PSyLoop
from psyclone.domain.lfric import LFRicConstants
from psyclone.domain.lfric.lfric_kern import LFRicKern
from psyclone.domain.lfric.lfric_types import LFRicTypes
from psyclone.errors import GenerationError, InternalError
from psyclone.psyGen import InvokeSchedule, HaloExchange
from psyclone.psyir.nodes import (
    Loop, Literal, Schedule, Reference, ArrayReference, StructureReference,
    Call, BinaryOperation, ArrayOfStructuresReference, Directive, DataNode,
    Node, Routine)
from psyclone.psyir.symbols import (
    AutomaticInterface, DataSymbol, ScalarType, UnresolvedType)



[docs]
class LFRicLoop(PSyLoop):
    '''
    The LFRic-specific PSyLoop class. This passes the LFRic-specific
    loop information to the base class so it creates the one
    we require.  Creates LFRic-specific loop bounds when the code is
    being generated.

    :param str loop_type: the type (iteration space) of this loop.
    :param kwargs: additional keyword arguments provided to the PSyIR node.
    :type kwargs: unwrapped dict.

    :raises InternalError: if an unrecognised loop_type is specified.
    :raises InternalError: if a parent that is descendant from an
        InvokeSchedule is not provided.

    '''

    # pylint: disable=too-many-instance-attributes
    def __init__(self, loop_type="", **kwargs):
        const = LFRicConstants()
        super().__init__(valid_loop_types=const.VALID_LOOP_TYPES, **kwargs)
        self.loop_type = loop_type

        ischedule = self.ancestor(InvokeSchedule)
        if not ischedule:
            raise InternalError(
                "LFRic loops must be inside an InvokeSchedule, a parent "
                "argument is mandatory when they are created.")

        # Set our variable at initialisation as it might be required
        # by other classes before code generation. A 'null' loop does not
        # have an associated variable.
        if self.loop_type != "null":
            if self.loop_type == "colours":
                tag = "colours_loop_idx"
                suggested_name = "colour"
            elif self.loop_type == "cells_in_colour":
                tag = "cell_loop_idx"
                suggested_name = "cell"
            elif self.loop_type == "dof":
                tag = "dof_loop_idx"
                suggested_name = "df"
            elif self.loop_type == "tiles_in_colour":
                tag = "tile_loop_idx"
                suggested_name = "tile"
            elif self.loop_type == "cells_in_tile":
                tag = "cell_loop_idx"
                suggested_name = "cell"
            elif self.loop_type == "":
                tag = "cell_loop_idx"
                suggested_name = "cell"
            else:
                raise InternalError(
                    f"Unsupported loop type '{self.loop_type}' found when "
                    f"creating loop variable. Supported values are: "
                    f"{const.VALID_LOOP_TYPES}")

            self.variable = ischedule.symbol_table.find_or_create_tag(
                tag, root_name=suggested_name, symbol_type=DataSymbol,
                datatype=LFRicTypes("LFRicIntegerScalarDataType")())

        # The loop bounds names are given by the number of previous LFRic loops
        # already present in the Schedule. Since this are inserted in order it
        # will produce sequentially ascending loop bound names. Currently they
        # are purposely not in the symbol table and prefixed with uninitialised
        # because the LFRicLoopBound class will replace them at lowering, but
        # this will be improved by TODO #2905
        idx = len(ischedule.loops())
        start_name = f"uninitialised_loop{idx}_start"
        stop_name = f"uninitialised_loop{idx}_stop"
        lbound = DataSymbol(start_name, datatype=ScalarType.integer_type())
        ubound = DataSymbol(stop_name, datatype=ScalarType.integer_type())
        self.addchild(Reference(lbound))  # start
        self.addchild(Reference(ubound))  # stop
        # step
        self.addchild(Literal("1", ScalarType.integer_type(), parent=self))
        self.addchild(Schedule(parent=self))  # loop body

        # At this stage we don't know what our loop bounds are
        self._lower_bound_name = None
        self._lower_bound_index = None
        self._upper_bound_name = None
        self._upper_bound_halo_depth = None


[docs]
    def lower_to_language_level(self):
        '''In-place replacement of DSL or high-level concepts into generic
        PSyIR constructs. This function replaces an LFRicLoop with a PSyLoop
        and inserts the loop boundaries into the new PSyLoop, or removes
        the loop node in case of a domain kernel. Once TODO #1731 is done
        (which should fix the loop boundaries, which atm rely on index of
        the loop in the schedule, i.e. can change when transformations are
        applied), this function can likely be removed.

        :returns: the lowered version of this node.
        :rtype: :py:class:`psyclone.psyir.node.Node`

        '''
        if (not Config.get().distributed_memory and
            all(kern.iterates_over == "halo_cell_column" for
                kern in self.kernels())):
            # No distributed memory and thus no halo cells but all kernels
            # only operate on halo cells => nothing to do.
            self.detach()
            return None

        # Set halo clean/dirty for all fields that are modified
        if Config.get().distributed_memory:
            if self._loop_type != "cells_in_colour":
                if self.unique_modified_args("gh_field"):
                    self.gen_mark_halos_clean_dirty()

        if self._loop_type != "null":

            # This is not a 'domain' loop (i.e. there is a real loop). First
            # check that there isn't any validation issues with the node.
            for child in self.loop_body.children:
                child.validate_global_constraints()

            # Then generate the loop bounds, this needs to be done BEFORE
            # lowering the loop body because it needs kernel information.
            start = self.start_expr.copy()
            stop = self.stop_expr.copy()
            step = self.step_expr.copy()

            # Now we can lower the nodes in the loop body
            for child in self.loop_body.children:
                child.lower_to_language_level()

            # Finally create the new lowered Loop and replace the domain one
            loop = Loop.create(self._variable, start, stop, step, [])
            loop.preceding_comment = self.preceding_comment
            loop.loop_body._symbol_table = \
                self.loop_body.symbol_table.shallow_copy()
            loop.children[3] = self.loop_body.copy()
            self.replace_with(loop)
            lowered_node = loop
        else:
            # If loop_type is "null" we do not need a loop at all, just the
            # kernel in its loop_body
            for child in self.loop_body.children:
                child.lower_to_language_level()
            # TODO #1010: This restriction can be removed when also lowering
            # the parent InvokeSchedule
            if len(self.loop_body.children) > 1:
                raise NotImplementedError(
                    f"Lowering LFRic domain loops that produce more than one "
                    f"children is not yet supported, but found:\n "
                    f"{self.view()}")
            lowered_node = self.loop_body[0].detach()
            self.replace_with(lowered_node)

        return lowered_node



[docs]
    def node_str(self, colour=True):
        ''' Creates a text summary of this loop node. We override this
        method from the Loop class because, in LFRic, the function
        space is now an object and we need to call orig_name on it. We
        also include the upper loop bound as this can now be modified.

        :param bool colour: whether or not to include control codes for colour.

        :returns: text summary of this node, optionally with control codes \
                  for colour highlighting.
        :rtype: str

        '''
        if self._loop_type == "null":
            return f"{self.coloured_name(colour)}[type='null']"

        if self._upper_bound_halo_depth:
            upper_bound = (f"{self._upper_bound_name}"
                           f"({self._upper_bound_halo_depth})")
        else:
            upper_bound = self._upper_bound_name
        return (f"{self.coloured_name(colour)}[type='{self._loop_type}', "
                f"field_space='{self._field_space.orig_name}', "
                f"it_space='{self.iteration_space}', "
                f"upper_bound='{upper_bound}']")



[docs]
    def load(self, kern):
        '''
        Load the state of this Loop using the supplied Kernel
        object. This method is provided so that we can individually
        construct Loop objects for a given kernel call.

        :param kern: Kernel object to use to populate state of Loop
        :type kern: :py:class:`psyclone.domain.lfric.LFRicKern`

        :raises GenerationError: if the field updated by the kernel has an \
            unexpected function space or if the kernel's 'operates-on' is \
            not consistent with the loop type.

        '''
        self._kern = kern

        self._field = kern.arguments.iteration_space_arg()
        self._field_name = self._field.name
        self._field_space = self._field.function_space

        if self.loop_type == "null" and kern.iterates_over != "domain":
            raise GenerationError(
                f"A LFRicLoop of type 'null' can only contain a kernel that "
                f"operates on the 'domain' but kernel '{kern.name}' operates "
                f"on '{kern.iterates_over}'.")
        self._iteration_space = kern.iterates_over  # cell_columns etc.

        # Loop bounds
        self.set_lower_bound("start")
        const = LFRicConstants()
        if kern.iterates_over in const.DOF_ITERATION_SPACES:
            # This loop must be over DoFs
            if (Config.get().api_conf("lfric").compute_annexed_dofs
                    and Config.get().distributed_memory
                    and not kern.is_reduction
                    and kern.iterates_over != "owned_dof"):
                # If we're generating DM code and the compute-annexed dofs
                # option is set then we include annexed dofs in the loop.
                self.set_upper_bound("nannexed")
            else:
                self.set_upper_bound("ndofs")
            return

        if "halo" in kern.iterates_over:
            if Config.get().distributed_memory:
                if kern.iterates_over == "halo_cell_column":
                    # In LFRic, the local cell-indexing scheme is set up such
                    # that owned cells have lower indices than halo cells, the
                    # first halo cell starts immediately after the last owned
                    # cell, and the cell indices are contiguous.
                    self.set_lower_bound("cell_halo_start")
                self.set_upper_bound("cell_halo", halo_depth=kern.halo_depth)
                return

        if not Config.get().distributed_memory:
            # Sequential
            self.set_upper_bound("ncells")
            return

        # Otherwise, distributed memory is enabled.
        if self._field.is_operator:
            # We always compute operators redundantly out to the L1
            # halo
            self.set_upper_bound("cell_halo", halo_depth=1)
            return
        if (self.field_space.orig_name in
                const.VALID_DISCONTINUOUS_NAMES):
            # Iterate to ncells for all discontinuous quantities,
            # including any_discontinuous_space
            self.set_upper_bound("ncells")
            return
        if (self.field_space.orig_name in
                const.CONTINUOUS_FUNCTION_SPACES):
            # Must iterate out to L1 halo for continuous quantities
            # unless the only arguments that are updated all have
            # 'GH_WRITE' access. The only time such an access is
            # permitted for a field on a continuous space is when the
            # kernel is implemented such that any writes to a given
            # shared dof are guaranteed to write the same value. There
            # is therefore no need to iterate into the L1 halo in order
            # to get correct values for annexed dofs.
            if not kern.all_updates_are_writes:
                self.set_upper_bound("cell_halo", halo_depth=1)
                return
            self.set_upper_bound("ncells")
            return
        if (self.field_space.orig_name in
                const.VALID_ANY_SPACE_NAMES):
            # We don't know whether any_space is continuous or not
            # so we have to err on the side of caution and assume that
            # it is. Again, if the only arguments that are updated have
            # 'GH_WRITE' access then we can relax this condition.
            if not kern.all_updates_are_writes:
                self.set_upper_bound("cell_halo", halo_depth=1)
                return
            self.set_upper_bound("ncells")
            return

        raise GenerationError(
            f"Unexpected function space found. Expecting one of "
            f"{const.VALID_FUNCTION_SPACES} but found "
            f"'{self.field_space.orig_name}'")



[docs]
    def set_lower_bound(self, name, index=None):
        ''' Set the lower bounds of this loop '''
        const = LFRicConstants()
        if name not in const.VALID_LOOP_BOUNDS_NAMES:
            raise GenerationError(
                "The specified lower bound loop name is invalid")
        if name in ["inner"] + const.HALO_ACCESS_LOOP_BOUNDS and index < 1:
            raise GenerationError(
                "The specified index '{index}' for this lower loop bound is "
                "invalid")
        self._lower_bound_name = name
        self._lower_bound_index = index



[docs]
    def set_upper_bound(self, name, halo_depth=None):
        '''Set the upper bound of this loop.

        :param str name: Loop upper-bound name. Must be a supported name.
        :param halo_depth: An optional argument indicating the depth of halo
                           that this loop accesses.
        :type halo_depth: Optional[:py:class:`psyclone.psyir.nodes.DataNode` |
                                   int]

        :raises GenerationError: if supplied with an invalid upper-bound name.
        :raises GenerationError: if supplied with a halo depth < 1.
        :raises TypeError: if the supplied halo_depth value is neither an int
                           or DataNode.
        '''
        const = LFRicConstants()
        if name not in const.VALID_LOOP_BOUNDS_NAMES:
            raise GenerationError(
                f"The specified upper loop bound name is invalid. Expected "
                f"one of {const.VALID_LOOP_BOUNDS_NAMES} but found '{name}'")
        if name == "start":
            raise GenerationError("'start' is not a valid upper bound")
        # Only halo bounds and inner may have an index. We could just
        # test for index here and assume that index is None for other
        # types of bounds, but checking the type of bound as well is a
        # safer option.
        if (name in (["inner"] + const.HALO_ACCESS_LOOP_BOUNDS) and
                isinstance(halo_depth, int)):
            if halo_depth < 1:
                raise GenerationError(
                    f"The specified halo depth '{halo_depth}' for this loop "
                    f"upper bound is < 1 which is invalid.")
        self._upper_bound_name = name
        if halo_depth and isinstance(halo_depth, int):
            # We support specifying depth as an int as a convenience but we
            # now convert it to a PSyIR literal.
            psyir = Literal(f"{halo_depth}", ScalarType.integer_type())
            self._upper_bound_halo_depth = psyir
        else:
            if halo_depth is not None and not isinstance(halo_depth, DataNode):
                raise TypeError(f"When setting the upper bound of a loop, any "
                                f"halo depth must be supplied as an int or "
                                f"PSyIR DataNode but got {type(halo_depth)}")
            self._upper_bound_halo_depth = halo_depth


    @property
    def upper_bound_name(self):
        ''' Returns the name of the upper loop bound '''
        return self._upper_bound_name

    @property
    def upper_bound_halo_depth(self):
        '''Returns the index of the upper loop bound. This is None if the upper
        bound name is not in HALO_ACCESS_LOOP_BOUNDS.

        :returns: the depth of the halo for a loops upper bound. If it \
            is None then a depth has not been provided. The depth value is \
            only valid when the upper-bound name is associated with a halo \
            e.g. 'cell_halo'.
        :rtype: int

        '''
        return self._upper_bound_halo_depth


[docs]
    def lower_bound_psyir(self) -> Node:
        '''
        :returns: the PSyIR for this loop lower bound.

        :raises GenerationError: if self._lower_bound_name is not "start"
                                 for sequential code.
        :raises GenerationError: if self._lower_bound_name is unrecognised.

        '''
        if (not Config.get().distributed_memory and
                self._lower_bound_name != "start"):
            raise GenerationError(
                f"The lower bound must be 'start' if we are sequential but "
                f"found '{self._upper_bound_name}'")
        if self._lower_bound_name == "start":
            return Literal("1", ScalarType.integer_type())

        # the start of our space is the end of the previous space +1
        if self._lower_bound_name == "inner":
            prev_space_name = self._lower_bound_name
            prev_space_index_str = str(self._lower_bound_index + 1)
        elif self._lower_bound_name == "ncells":
            prev_space_name = "inner"
            prev_space_index_str = "1"
        elif (self._lower_bound_name == "cell_halo" and
              self._lower_bound_index == 1):
            prev_space_name = "ncells"
            prev_space_index_str = ""
        elif self._lower_bound_name == "cell_halo_start":
            prev_space_name = "edge"
            prev_space_index_str = ""
        elif (self._lower_bound_name == "cell_halo" and
              self._lower_bound_index > 1):
            prev_space_name = self._lower_bound_name
            prev_space_index_str = str(self._lower_bound_index - 1)
        else:
            raise GenerationError(
                f"Unsupported lower bound name '{self._lower_bound_name}' "
                f"found")
        # Use the Routine-scoped SymbolTable to share the same symbol for all
        # Loops in the Invoke.
        mesh_obj = self.ancestor(Routine).symbol_table.\
            find_or_create_tag("mesh")
        call = Call.create(
                StructureReference.create(
                    mesh_obj, ["get_last_" + prev_space_name + "_cell"]))
        if prev_space_index_str:
            call.addchild(Literal(prev_space_index_str,
                                  ScalarType.integer_type()))
        return BinaryOperation.create(BinaryOperation.Operator.ADD,
                                      call,
                                      Literal("1", ScalarType.integer_type()))


    @property
    def _mesh_name(self):
        '''
        :returns: the name of the mesh variable from which to get the bounds \
                  for this loop.
        :rtype: str
        '''
        # We must allow for self._kern being None (as it will be for
        # a built-in).
        if self._kern and self._kern.is_intergrid:
            # We have more than one mesh object to choose from and we
            # want the coarse one because that determines the iteration
            # space. _field_name holds the name of the argument that
            # determines the iteration space of this kernel and that
            # is set-up to be the one on the coarse mesh (in
            # LFRicKernelArguments.iteration_space_arg()).
            tag_name = "mesh_" + self._field_name
        else:
            # It's not an inter-grid kernel so there's only one mesh
            tag_name = "mesh"

        # The symbol for the mesh will already have been added to the
        # symbol table associated with the InvokeSchedule.
        return self.ancestor(Routine).symbol_table.\
            lookup_with_tag(tag_name).name


[docs]
    def upper_bound_psyir(self) -> Node:
        '''
        :returns: the PSyIR for this loop upper bound.

        '''
        sym_tab = self.scope.symbol_table

        # Precompute halo_index as we use it in more than one of the if clauses
        halo_index = None
        if self._upper_bound_halo_depth:
            halo_index = self._upper_bound_halo_depth

        if self._upper_bound_name == "ncolours":
            # Loop over colours
            kernels = self.walk(LFRicKern)
            if not kernels:
                raise InternalError(
                    "Failed to find a kernel within a loop over colours.")
            # Check that all kernels have been coloured. We can't check the
            # number of colours since that is only known at runtime.
            for kern in kernels:
                if not kern.ncolours_var:
                    raise InternalError(
                        f"All kernels within a loop over colours must have "
                        f"been coloured but kernel '{kern.name}' has not")
            return Reference(sym_tab.lookup(kernels[0].ncolours_var))
        if self._upper_bound_name == "ntilecolours":
            # Loop over colours
            kernels = self.walk(LFRicKern)
            if not kernels:
                raise InternalError(
                    "Failed to find a kernel within a loop over tile-colours.")
            # Check that all kernels have been coloured. We can't check the
            # number of colours since that is only known at runtime.
            for kern in kernels:
                if not kern.ntilecolours_var:
                    raise InternalError(
                        f"All kernels within a loop over colours must have "
                        f"been coloured but kernel '{kern.name}' has not")
            return Reference(sym_tab.lookup(kernels[0].ntilecolours_var))

        if self._upper_bound_name == "ncolour":
            # Loop over cells of a particular colour when DM is disabled.
            # We use the same, DM API as that returns sensible values even
            # when running without MPI.
            root_name = "last_edge_cell_all_colours"
            if self._kern.is_intergrid:
                root_name += "_" + self._field_name
            sym = sym_tab.find_or_create_tag(root_name)
            colour = sym_tab.lookup_with_tag("colours_loop_idx")
            return ArrayReference.create(sym, [Reference(colour)])
        if self._upper_bound_name == "colour_halo":
            # Loop over cells of a particular colour when DM is enabled. The
            # LFRic API used here allows for colouring with redundant
            # computation.
            if halo_index:
                # The colouring API provides a 2D array that holds the last
                # halo cell for a given colour and halo depth.
                depth = halo_index.copy()
            else:
                # If no depth is specified then we go to the full halo depth
                depth = Reference(sym_tab.find_or_create_tag(
                    f"max_halo_depth_{self._mesh_name}"))
            root_name = "last_halo_cell_all_colours"
            if self._kern.is_intergrid:
                root_name += "_" + self._field_name
            sym = sym_tab.find_or_create_tag(root_name)
            colour = Reference(sym_tab.lookup_with_tag("colours_loop_idx"))
            return ArrayReference.create(sym, [colour, depth])
        if self._upper_bound_name in ["ndofs", "nannexed"]:
            if Config.get().distributed_memory:
                if self._upper_bound_name == "ndofs":
                    return self.field.generate_method_call(
                        "get_last_dof_owned")
                return self.field.generate_method_call("get_last_dof_annexed")
            return Reference(sym_tab.lookup(self._kern.undf_name))

        if self._upper_bound_name == "ncells":
            if Config.get().distributed_memory:
                result = Call.create(
                    StructureReference.create(
                        sym_tab.lookup(self._mesh_name),
                        ["get_last_edge_cell"]
                    )
                )
            else:
                result = self.field.generate_method_call("get_ncell")
            return result
        if self._upper_bound_name == "cell_halo":
            if Config.get().distributed_memory:
                result = Call.create(
                    StructureReference.create(
                        sym_tab.lookup(self._mesh_name),
                        ["get_last_halo_cell"]
                    )
                )
                if halo_index:
                    result.addchild(halo_index.copy())
                return result
            raise GenerationError(
                "'cell_halo' is not a valid loop upper bound for "
                "sequential/shared-memory code")
        if self._upper_bound_name == "dof_halo":
            if Config.get().distributed_memory:
                result = self.field.generate_method_call("get_last_dof_halo")
                if halo_index:
                    result.addchild(halo_index.copy())
                return result
            raise GenerationError(
                "'dof_halo' is not a valid loop upper bound for "
                "sequential/shared-memory code")
        if self._upper_bound_name == "inner":
            if Config.get().distributed_memory:
                result = Call.create(
                    StructureReference.create(
                        sym_tab.lookup(self._mesh_name),
                        ["get_last_inner_cell"]
                    )
                )
                result.addchild(halo_index)
                return result
            raise GenerationError(
                "'inner' is not a valid loop upper bound for "
                "sequential/shared-memory code")
        if self._upper_bound_name == "ntiles_per_colour":
            tag = "last_edge_tile_per_colour"
            if self._kern.is_intergrid:
                tag += "_" + self._field_name
            result = ArrayReference.create(
                sym_tab.lookup_with_tag(tag),
                [Reference(sym_tab.lookup_with_tag("colours_loop_idx"))]
            )
            return result
        if self._upper_bound_name == "ncells_per_colour_and_tile":
            tag = "last_edge_cell_per_colour_and_tile"
            if self._kern.is_intergrid:
                tag += "_" + self._field_name
            result = ArrayReference.create(
                sym_tab.lookup_with_tag(tag),
                [Reference(sym_tab.lookup_with_tag("colours_loop_idx")),
                 Reference(sym_tab.lookup_with_tag("tile_loop_idx"))]
            )
            return result
        if self._upper_bound_name == "ntiles_per_colour_halo":
            if halo_index:
                depth = halo_index.copy()
            else:
                # If no depth is specified then we go to the full halo depth
                depth = Reference(sym_tab.find_or_create_tag(
                    f"max_halo_depth_{self._mesh_name}"))
            if Config.get().distributed_memory:
                tag = "last_halo_tile_per_colour"
                if self._kern.is_intergrid:
                    tag += "_" + self._field_name
                result = ArrayReference.create(
                    sym_tab.lookup_with_tag(tag),
                    [Reference(sym_tab.lookup_with_tag("colours_loop_idx")),
                     depth]
                )
                return result
            raise GenerationError(
                "'last_halo_tile_per_colour' is not a valid loop upper bound "
                "for non-distributed-memory code")
        if self._upper_bound_name == "ncells_per_colour_and_tile_halo":
            if halo_index:
                depth = halo_index.copy()
            else:
                # If no depth is specified then we go to the full halo depth
                depth = Reference(sym_tab.find_or_create_tag(
                    f"max_halo_depth_{self._mesh_name}"))
            if Config.get().distributed_memory:
                tag = "last_halo_cell_per_colour_and_tile"
                if self._kern.is_intergrid:
                    tag += "_" + self._field_name
                result = ArrayReference.create(
                    sym_tab.lookup_with_tag(tag),
                    [Reference(sym_tab.lookup_with_tag("colours_loop_idx")),
                     Reference(sym_tab.lookup_with_tag("tile_loop_idx")),
                     depth]
                )
                return result
            raise GenerationError(
                "'last_halo_cell_per_colour_and_tile' is not a valid loop "
                "upper bound for non-distributed-memory code")
        raise GenerationError(
            f"Unsupported upper bound name '{self._upper_bound_name}' found "
            f"in lfricloop.upper_bound_fortran()")


    def _halo_read_access(self, arg):
        '''
        Determines whether the supplied argument has (or might have) its
        halo data read within this loop. Returns True if it does, or if
        it might and False if it definitely does not.

        :param arg: an argument contained within this loop.
        :type arg: :py:class:`psyclone.lfric.LFRicArgument`

        :returns: True if the argument reads, or might read from the \
            halo and False otherwise.
        :rtype: bool

        :raises GenerationError: if an unsupported upper loop bound name is \
            provided for kernels with stencil access.
        :raises InternalError: if an unsupported field access is found.
        :raises InternalError: if an unsupported argument type is found.

        '''
        const = LFRicConstants()
        if arg.is_scalar or arg.is_operator or arg.is_scalar_array:
            # Scalars and operators do not have halos
            return False
        if arg.is_field:
            # This is a field so might read from a halo
            if arg.access in [AccessType.WRITE]:
                # This is not a read access
                return False
            if arg.access in AccessType.all_read_accesses():
                # This is a read access
                if arg.descriptor.stencil:
                    if self._upper_bound_name not in ["cell_halo", "ncells"]:
                        raise GenerationError(
                            f"Loop bounds other than 'cell_halo' and 'ncells' "
                            f"are currently unsupported for kernels with "
                            f"stencil accesses. Found "
                            f"'{self._upper_bound_name}'.")
                    # An upper bound of 'cell_halo' means that the
                    # halo might be accessed irrespective of the
                    # stencil and a stencil read access with upper
                    # bound 'ncells' might read from the
                    # halo due to the stencil.
                    return True
                # This is a non-stencil read access
                if self._upper_bound_name in const.HALO_ACCESS_LOOP_BOUNDS:
                    # An upper bound that is part of the halo means
                    # that the halo might be accessed.
                    return True
                # If a kernel updates fields on both continuous and
                # discontinuous spaces then the iteration space is taken to be
                # the continuous one.
                iter_arg = self.kernel.arguments.iteration_space_arg()
                if (not arg.discontinuous and
                        not iter_arg.discontinuous and
                        self.kernel.iterates_over == "cell_column" and
                        self.kernel.all_updates_are_writes and
                        self._upper_bound_name == "ncells"):
                    # This is the special case of a kernel that guarantees to
                    # write the same value to any given dof, irrespective of
                    # cell column.
                    return False
                if not arg.discontinuous and \
                   self._upper_bound_name in ["ncells", "nannexed"]:
                    # Annexed dofs may be accessed. Return False if we
                    # always compute annexed dofs and True if we don't
                    # (as annexed dofs are part of the level 1 halo).
                    return not Config.get().api_conf("lfric").\
                        compute_annexed_dofs
                # The halo is not accessed.
                return False
            raise InternalError(
                f"Unexpected field access type '{arg.access}' found for arg "
                f"'{arg.name}'.")
        raise InternalError(
            f"Expecting arg '{arg.name}' to be an operator, scalar or field, "
            f"but found '{arg.argument_type}'.")

    def _add_field_component_halo_exchange(self, halo_field, idx=None):
        '''An internal helper method to add the halo exchange call immediately
        before this loop using the halo_field argument for the
        associated field information and the optional idx argument if
        the field is a vector field.

        In certain situations the halo exchange will not be
        required. This is dealt with by adding the halo exchange,
        asking it if it is required and then removing it if it is
        not. This may seem strange but the logic for determining
        whether a halo exchange is required is within the halo
        exchange class so it is simplest to do it this way

        :param halo_field: the argument requiring a halo exchange
        :type halo_field: :py:class:`psyclone.lfric.LFRicArgument`
        :param index: optional argument providing the vector index.
        :type index: Optional[int]

        :raises InternalError: if there are two forward write
            dependencies and they are both associated with halo
            exchanges.

        '''
        # Avoid circular import
        # pylint: disable=import-outside-toplevel
        from psyclone.lfric import LFRicHaloExchange
        exchange = LFRicHaloExchange(halo_field,
                                     parent=self.parent,
                                     vector_index=idx)
        self.parent.children.insert(self.position,
                                    exchange)

        # Is this halo exchange required? The halo exchange being
        # added may replace an existing halo exchange, which would
        # then be returned as a halo exchange dependence and an
        # exception raised (as a halo exchange should not have another
        # halo exchange as a dependence). Therefore, halo exchange
        # dependencies are ignored here by setting the ignore_hex_dep
        # optional argument.
        required, _ = exchange.required(ignore_hex_dep=True)
        if not required:
            exchange.detach()
        else:
            # The halo exchange we have added may be replacing an
            # existing one. If so, the one being replaced will be the
            # first and only write dependence encountered and must be
            # removed.
            results = exchange.field.forward_write_dependencies()
            if results:
                first_dep_call = results[0].call
                if isinstance(first_dep_call, HaloExchange):
                    # Sanity check. If the first dependence is a field
                    # accessed within a halo exchange then the
                    # subsequent one must not be.
                    next_results = results[0].forward_write_dependencies()
                    if next_results and any(tmp for tmp in next_results
                                            if isinstance(tmp.call,
                                                          HaloExchange)):
                        raise InternalError(
                            f"When replacing a halo exchange with another one "
                            f"for field {exchange.field.name}, a subsequent "
                            f"dependent halo exchange was found. This should "
                            f"never happen.")
                    first_dep_call.detach()

    def _add_halo_exchange(self, halo_field):
        '''Internal helper method to add (a) halo exchange call(s) immediately
        before this loop using the halo_field argument for the
        associated field information. If the field is a vector then
        add the appropriate number of halo exchange calls.

        :param halo_field: the argument requiring a halo exchange
        :type halo_field: :py:class:`psyclone.lfric.LFRicArgument`

        '''
        if halo_field.vector_size > 1:
            # the range function below returns values from
            # 1 to the vector size which is what we
            # require in our Fortran code
            for idx in range(1, halo_field.vector_size+1):
                self._add_field_component_halo_exchange(halo_field, idx)
        else:
            self._add_field_component_halo_exchange(halo_field)


[docs]
    def update_halo_exchanges(self):
        '''add and/or remove halo exchanges due to changes in the loops
        bounds'''
        # this call adds any new halo exchanges that are
        # required. This is done by adding halo exchanges before this
        # loop for any fields in the loop that require a halo exchange
        # and don't already have one
        self.create_halo_exchanges()
        # Now remove any existing halo exchanges that are no longer
        # required. This is done by removing halo exchanges after this
        # loop where a field in this loop previously had a forward
        # dependence on a halo exchange but no longer does
        # pylint: disable=too-many-nested-blocks
        # Avoid circular import
        # pylint: disable=import-outside-toplevel
        from psyclone.lfric import LFRicHaloExchange
        for call in self.kernels():
            for arg in call.arguments.args:
                if arg.access in AccessType.all_write_accesses():
                    dep_arg_list = arg.forward_read_dependencies()
                    for dep_arg in dep_arg_list:
                        if isinstance(dep_arg.call, LFRicHaloExchange):
                            # found a halo exchange as a forward dependence
                            # ask the halo exchange if it is required
                            halo_exchange = dep_arg.call
                            required, _ = halo_exchange.required()
                            if not required:
                                halo_exchange.detach()



[docs]
    def create_halo_exchanges(self):
        '''Add halo exchanges before this loop as required by fields within
        this loop. To keep the logic simple we assume that any field
        that accesses the halo will require a halo exchange and then
        remove the halo exchange if this is not the case (when
        previous writers perform sufficient redundant computation). It
        is implemented this way as the halo exchange class determines
        whether it is required or not so a halo exchange needs to
        exist in order to find out. The appropriate logic is coded in
        the _add_halo_exchange helper method. In some cases a new halo
        exchange will replace an existing one. In this situation that
        routine also removes the old one.

        '''
        for halo_field in self.unique_fields_with_halo_reads():
            # for each unique field in this loop that has its halo
            # read (including annexed dofs), find the previous update
            # of this field
            prev_arg_list = halo_field.backward_write_dependencies()
            if not prev_arg_list:
                # field has no previous dependence so create new halo
                # exchange(s) as we don't know the state of the fields
                # halo on entry to the invoke
                self._add_halo_exchange(halo_field)
            else:
                # field has one or more previous dependencies
                if len(prev_arg_list) > 1:
                    # field has more than one previous dependencies so
                    # should be a vector
                    if halo_field.vector_size <= 1:
                        raise GenerationError(
                            f"Error in create_halo_exchanges. Expecting field "
                            f"'{halo_field.name}' to be a vector as it has "
                            f"multiple previous dependencies")
                    if len(prev_arg_list) != halo_field.vector_size:
                        raise GenerationError(
                            f"Error in create_halo_exchanges. Expecting a "
                            f"dependence for each vector index for field "
                            f"'{halo_field.name}' but the number of "
                            f"dependencies is '{halo_field.vector_size}' and "
                            f"the vector size is '{len(prev_arg_list)}'.")
                    for arg in prev_arg_list:
                        # Avoid circular import
                        # pylint: disable=import-outside-toplevel
                        from psyclone.lfric import LFRicHaloExchange
                        if not isinstance(arg.call, LFRicHaloExchange):
                            raise GenerationError(
                                "Error in create_halo_exchanges. Expecting "
                                "all dependent nodes to be halo exchanges")
                prev_node = prev_arg_list[0].call
                # Avoid circular import
                # pylint: disable=import-outside-toplevel
                from psyclone.lfric import LFRicHaloExchange
                if not isinstance(prev_node, LFRicHaloExchange):
                    # previous dependence is not a halo exchange so
                    # call the add halo exchange logic which
                    # determines whether a halo exchange is required
                    # or not
                    self._add_halo_exchange(halo_field)



[docs]
    def gen_mark_halos_clean_dirty(self):
        '''
        Generates the necessary code to mark halo regions for all modified
        fields as clean or dirty following execution of this loop.
        '''
        # Set halo clean/dirty for all fields that are modified
        fields = self.unique_modified_args("gh_field")

        sym_table = self.ancestor(Routine).symbol_table
        insert_loc = self
        # If it has ancestor directive or loop keep going up
        while isinstance(insert_loc.parent.parent, (Directive, Loop)):
            insert_loc = insert_loc.parent.parent
        cursor = insert_loc.position
        insert_loc = insert_loc.parent
        init_cursor = cursor

        # First set all of the halo dirty unless we are
        # subsequently going to set all of the halo clean
        for field in fields:
            field_symbol = sym_table.find_or_create(
                                field.proxy_name,
                                symbol_type=DataSymbol,
                                datatype=UnresolvedType(),
                                interface=AutomaticInterface())
            # Avoid circular import
            # pylint: disable=import-outside-toplevel
            from psyclone.lfric import HaloWriteAccess
            # The HaloWriteAccess class provides information about how the
            # supplied field is accessed within its parent loop
            hwa = HaloWriteAccess(field, self)
            if not hwa.max_depth or hwa.dirty_outer:
                # output set dirty as some of the halo will not be set to clean
                if field.vector_size > 1:
                    # the range function below returns values from 1 to the
                    # vector size which is what we require in our Fortran code
                    for index in range(1, field.vector_size+1):
                        idx_literal = Literal(str(index),
                                              ScalarType.integer_type())
                        call = Call.create(ArrayOfStructuresReference.create(
                            field_symbol, [idx_literal], ["set_dirty"]))
                        cursor += 1
                        insert_loc.addchild(call, cursor)
                else:
                    call = Call.create(StructureReference.create(
                        field_symbol, ["set_dirty"]))
                    cursor += 1
                    insert_loc.addchild(call, cursor)

            # Now set appropriate parts of the halo clean where redundant
            # computation has been performed or a kernel is written to operate
            # on halo cells.
            clean_depth = hwa.clean_depth
            if clean_depth:
                if field.vector_size > 1:
                    # The range function below returns values from 1 to the
                    # vector size, as required in our Fortran code.
                    for index in range(1, field.vector_size+1):
                        set_clean = Call.create(
                            ArrayOfStructuresReference.create(
                                field_symbol,
                                [Literal(str(index),
                                         ScalarType.integer_type())],
                                ["set_clean"]))
                        set_clean.addchild(clean_depth.copy())
                        cursor += 1
                        insert_loc.addchild(set_clean, cursor)
                else:
                    set_clean = Call.create(
                        StructureReference.create(
                            field_symbol, ["set_clean"]))
                    set_clean.addchild(clean_depth.copy())
                    cursor += 1
                    insert_loc.addchild(set_clean, cursor)

        if cursor > init_cursor:
            for child in insert_loc.children[init_cursor:]:
                if child.preceding_comment.startswith("Set halos dirty"):
                    child.preceding_comment = ""
            insert_loc[init_cursor + 1].preceding_comment = (
                "Set halos dirty/clean for fields modified in the above "
                "loop(s)")



[docs]
    def independent_iterations(self,
                               test_all_variables=False,
                               signatures_to_ignore=None,
                               dep_tools=None):
        '''
        This function is an LFRic-specific override of the default method
        in the Loop class. It allows domain-specific rules to be applied when
        determining whether or not loop iterations are independent.

        :param bool test_all_variables: if True, it will test if all variable
            accesses are independent, otherwise it will stop after the first
            variable access is found that isn't.
        :param signatures_to_ignore: list of signatures for which to skip
            the access checks.
        :type signatures_to_ignore: Optional[
            List[:py:class:`psyclone.core.Signature`]]
        :param dep_tools: an optional instance of DependencyTools so that the
            caller can access any diagnostic messages detailing why the loop
            iterations are not independent.
        :type dep_tools: Optional[
            :py:class:`psyclone.psyir.tools.DependencyTools`]

        :returns: True if the loop iterations are independent, False otherwise.
        :rtype: bool

        '''
        # pylint: disable=import-outside-toplevel
        from psyclone.psyir.tools import DependencyTools, DTCode
        if not dep_tools:
            dtools = DependencyTools()
        else:
            dtools = dep_tools

        if self.loop_type in ["null", "colours"]:
            # We know we can't parallelise these loops. ("null" means there
            # is no actual loop and "colours" is the *outer* loop over the
            # different colours used - it is the inner, "cells_in_colour" loop
            # over cells of a single colour which can be parallelised.)
            return False

        try:
            stat = dtools.can_loop_be_parallelised(
                self, test_all_variables=test_all_variables,
                signatures_to_ignore=signatures_to_ignore)
            if stat:
                return True
        except (InternalError, KeyError):
            # LFRic still has symbols that don't exist in the symbol_table
            # until the lowering step, so the dependency analysis raises
            # errors in some cases.
            # TODO #2874 - when a transformation colours a loop we must
            # ensure "last_[halo]_cell_all_colours" is added to the symbol
            # table.
            return True

        # The generic DA says that this loop cannot be parallelised. However,
        # we use domain-specific information to qualify this.
        if self.loop_type == "cells_in_colour":
            # This loop is over cells of a single colour.
            # According to LFRic rules this is safe to parallelise.
            return True

        if self.loop_type == "dof":
            # The generic DA can't see the PSyIR of this Builtin (because it
            # hasn't been lowered to language level) so we use
            # domain-specific knowledge about its properties.
            if self.kernel.is_reduction:
                dtools._add_message(
                    f"Builtin '{self.kernel.name}' performs a reduction",
                    DTCode.WARN_SCALAR_REDUCTION)
                return False
            return True

        if self.loop_type == "":
            # We can parallelise a non-coloured loop if it only updates
            # quantities on discontinuous function spaces. If an LFRic kernel
            # updates quantities on a continuous function space then it must
            # have at least one argument with GH_INC access. Therefore, we
            # can simply check whether or not it has such an argument in order
            # to infer the continuity of the space.
            if self.has_inc_arg():
                dtools._add_message(
                    f"Kernel '{self.kernel.name}' performs an INC update",
                    DTCode.ERROR_WRITE_WRITE_RACE)
                return False
            return True

        raise InternalError(f"independent_iterations: loop of type "
                            f"'{self.loop_type}' is not supported.")




# ---------- Documentation utils -------------------------------------------- #
# The list of module members that we wish AutoAPI to generate
# documentation for.
__all__ = ['LFRicLoop']